1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2009, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/*******************************************************************************
7*
8* File CUCDTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API, added tests for string functions
13********************************************************************************
14*/
15
16#include <string.h>
17#include <math.h>
18#include <stdlib.h>
19
20#include "unicode/utypes.h"
21#include "unicode/uchar.h"
22#include "unicode/putil.h"
23#include "unicode/ustring.h"
24#include "unicode/uloc.h"
25
26#include "cintltst.h"
27#include "putilimp.h"
28#include "uparse.h"
29#include "ucase.h"
30#include "ubidi_props.h"
31#include "uprops.h"
32#include "uset_imp.h"
33#include "usc_impl.h"
34#include "unormimp.h"
35#include "udatamem.h" /* for testing ucase_openBinary() */
36#include "cucdapi.h"
37
38#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
39
40/* prototypes --------------------------------------------------------------- */
41
42static void TestUpperLower(void);
43static void TestLetterNumber(void);
44static void TestMisc(void);
45static void TestPOSIX(void);
46static void TestControlPrint(void);
47static void TestIdentifier(void);
48static void TestUnicodeData(void);
49static void TestCodeUnit(void);
50static void TestCodePoint(void);
51static void TestCharLength(void);
52static void TestCharNames(void);
53static void TestMirroring(void);
54static void TestUScriptRunAPI(void);
55static void TestAdditionalProperties(void);
56static void TestNumericProperties(void);
57static void TestPropertyNames(void);
58static void TestPropertyValues(void);
59static void TestConsistency(void);
60static void TestUCase(void);
61static void TestUBiDiProps(void);
62static void TestCaseFolding(void);
63
64/* internal methods used */
65static int32_t MakeProp(char* str);
66static int32_t MakeDir(char* str);
67
68/* helpers ------------------------------------------------------------------ */
69
70static void
71parseUCDFile(const char *filename,
72             char *fields[][2], int32_t fieldCount,
73             UParseLineFn *lineFn, void *context,
74             UErrorCode *pErrorCode) {
75    char path[256];
76    char backupPath[256];
77
78    if(U_FAILURE(*pErrorCode)) {
79        return;
80    }
81
82    /* Look inside ICU_DATA first */
83    strcpy(path, u_getDataDirectory());
84    strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING);
85    strcat(path, filename);
86
87    /* As a fallback, try to guess where the source data was located
88     *    at the time ICU was built, and look there.
89     */
90    strcpy(backupPath, ctest_dataSrcDir());
91    strcat(backupPath, U_FILE_SEP_STRING);
92    strcat(backupPath, "unidata" U_FILE_SEP_STRING);
93    strcat(backupPath, filename);
94
95    u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode);
96    if(*pErrorCode==U_FILE_ACCESS_ERROR) {
97        *pErrorCode=U_ZERO_ERROR;
98        u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode);
99    }
100    if(U_FAILURE(*pErrorCode)) {
101        log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode));
102    }
103}
104
105/* test data ---------------------------------------------------------------- */
106
107static const UChar  LAST_CHAR_CODE_IN_FILE = 0xFFFD;
108static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf";
109static const int32_t tagValues[] =
110    {
111    /* Mn */ U_NON_SPACING_MARK,
112    /* Mc */ U_COMBINING_SPACING_MARK,
113    /* Me */ U_ENCLOSING_MARK,
114    /* Nd */ U_DECIMAL_DIGIT_NUMBER,
115    /* Nl */ U_LETTER_NUMBER,
116    /* No */ U_OTHER_NUMBER,
117    /* Zs */ U_SPACE_SEPARATOR,
118    /* Zl */ U_LINE_SEPARATOR,
119    /* Zp */ U_PARAGRAPH_SEPARATOR,
120    /* Cc */ U_CONTROL_CHAR,
121    /* Cf */ U_FORMAT_CHAR,
122    /* Cs */ U_SURROGATE,
123    /* Co */ U_PRIVATE_USE_CHAR,
124    /* Cn */ U_UNASSIGNED,
125    /* Lu */ U_UPPERCASE_LETTER,
126    /* Ll */ U_LOWERCASE_LETTER,
127    /* Lt */ U_TITLECASE_LETTER,
128    /* Lm */ U_MODIFIER_LETTER,
129    /* Lo */ U_OTHER_LETTER,
130    /* Pc */ U_CONNECTOR_PUNCTUATION,
131    /* Pd */ U_DASH_PUNCTUATION,
132    /* Ps */ U_START_PUNCTUATION,
133    /* Pe */ U_END_PUNCTUATION,
134    /* Po */ U_OTHER_PUNCTUATION,
135    /* Sm */ U_MATH_SYMBOL,
136    /* Sc */ U_CURRENCY_SYMBOL,
137    /* Sk */ U_MODIFIER_SYMBOL,
138    /* So */ U_OTHER_SYMBOL,
139    /* Pi */ U_INITIAL_PUNCTUATION,
140    /* Pf */ U_FINAL_PUNCTUATION
141    };
142
143static const char dirStrings[][5] = {
144    "L",
145    "R",
146    "EN",
147    "ES",
148    "ET",
149    "AN",
150    "CS",
151    "B",
152    "S",
153    "WS",
154    "ON",
155    "LRE",
156    "LRO",
157    "AL",
158    "RLE",
159    "RLO",
160    "PDF",
161    "NSM",
162    "BN"
163};
164
165void addUnicodeTest(TestNode** root);
166
167void addUnicodeTest(TestNode** root)
168{
169    addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit");
170    addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint");
171    addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength");
172    addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues");
173    addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData");
174    addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties");
175    addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties");
176    addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower");
177    addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber");
178    addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc");
179    addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX");
180    addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint");
181    addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier");
182    addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames");
183    addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring");
184    addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI");
185    addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI");
186    addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames");
187    addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues");
188    addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency");
189    addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase");
190    addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps");
191    addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding");
192}
193
194/*==================================================== */
195/* test u_toupper() and u_tolower()                    */
196/*==================================================== */
197static void TestUpperLower()
198{
199    const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000};
200    const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000};
201    U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21);
202    U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
203    int32_t i;
204
205    U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21);
206    U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21);
207
208/*
209Checks LetterLike Symbols which were previously a source of confusion
210[Bertrand A. D. 02/04/98]
211*/
212    for (i=0x2100;i<0x2138;i++)
213    {
214        /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */
215        if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132)
216        {
217            if (i != (int)u_tolower(i)) /* itself */
218                log_err("Failed case conversion with itself: U+%04x\n", i);
219            if (i != (int)u_toupper(i))
220                log_err("Failed case conversion with itself: U+%04x\n", i);
221        }
222    }
223
224    for(i=0; i < u_strlen(upper); i++){
225        if(u_tolower(upper[i]) != lower[i]){
226            log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i]));
227        }
228    }
229
230    log_verbose("testing upper lower\n");
231    for (i = 0; i < 21; i++) {
232
233        if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i]))
234        {
235            log_err("Failed isLowerCase test at  %c\n", upperTest[i]);
236        }
237        else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i]))
238         {
239            log_err("Failed isUpperCase test at %c\n", lowerTest[i]);
240        }
241        else if (upperTest[i] != u_tolower(lowerTest[i]))
242        {
243            log_err("Failed case conversion from %c  To %c :\n", lowerTest[i], upperTest[i]);
244        }
245        else if (lowerTest[i] != u_toupper(upperTest[i]))
246         {
247            log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]);
248        }
249        else if (upperTest[i] != u_tolower(upperTest[i]))
250        {
251            log_err("Failed case conversion with itself: %c\n", upperTest[i]);
252        }
253        else if (lowerTest[i] != u_toupper(lowerTest[i]))
254        {
255            log_err("Failed case conversion with itself: %c\n", lowerTest[i]);
256        }
257    }
258    log_verbose("done testing upper lower\n");
259
260    log_verbose("testing u_istitle\n");
261    {
262        static const UChar expected[] = {
263            0x1F88,
264            0x1F89,
265            0x1F8A,
266            0x1F8B,
267            0x1F8C,
268            0x1F8D,
269            0x1F8E,
270            0x1F8F,
271            0x1F88,
272            0x1F89,
273            0x1F8A,
274            0x1F8B,
275            0x1F8C,
276            0x1F8D,
277            0x1F8E,
278            0x1F8F,
279            0x1F98,
280            0x1F99,
281            0x1F9A,
282            0x1F9B,
283            0x1F9C,
284            0x1F9D,
285            0x1F9E,
286            0x1F9F,
287            0x1F98,
288            0x1F99,
289            0x1F9A,
290            0x1F9B,
291            0x1F9C,
292            0x1F9D,
293            0x1F9E,
294            0x1F9F,
295            0x1FA8,
296            0x1FA9,
297            0x1FAA,
298            0x1FAB,
299            0x1FAC,
300            0x1FAD,
301            0x1FAE,
302            0x1FAF,
303            0x1FA8,
304            0x1FA9,
305            0x1FAA,
306            0x1FAB,
307            0x1FAC,
308            0x1FAD,
309            0x1FAE,
310            0x1FAF,
311            0x1FBC,
312            0x1FBC,
313            0x1FCC,
314            0x1FCC,
315            0x1FFC,
316            0x1FFC,
317        };
318        int32_t num = sizeof(expected)/sizeof(expected[0]);
319        for(i=0; i<num; i++){
320            if(!u_istitle(expected[i])){
321                log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]);
322            }
323        }
324
325    }
326}
327
328/* compare two sets and verify that their difference or intersection is empty */
329static UBool
330showADiffB(const USet *a, const USet *b,
331           const char *a_name, const char *b_name,
332           UBool expect, UBool diffIsError) {
333    USet *aa;
334    int32_t i, start, end, length;
335    UErrorCode errorCode;
336
337    /*
338     * expect:
339     * TRUE  -> a-b should be empty, that is, b should contain all of a
340     * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa)
341     */
342    if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) {
343        return TRUE;
344    }
345
346    /* clone a to aa because a is const */
347    aa=uset_open(1, 0);
348    if(aa==NULL) {
349        /* unusual problem - out of memory? */
350        return FALSE;
351    }
352    uset_addAll(aa, a);
353
354    /* compute the set in question */
355    if(expect) {
356        /* a-b */
357        uset_removeAll(aa, b);
358    } else {
359        /* a&b */
360        uset_retainAll(aa, b);
361    }
362
363    /* aa is not empty because of the initial tests above; show its contents */
364    errorCode=U_ZERO_ERROR;
365    i=0;
366    for(;;) {
367        length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode);
368        if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
369            break; /* done */
370        }
371        if(U_FAILURE(errorCode)) {
372            log_err("error comparing %s with %s at difference item %d: %s\n",
373                a_name, b_name, i, u_errorName(errorCode));
374            break;
375        }
376        if(length!=0) {
377            break; /* done with code points, got a string or -1 */
378        }
379
380        if(diffIsError) {
381            if(expect) {
382                log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
383            } else {
384                log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
385            }
386        } else {
387            if(expect) {
388                log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name);
389            } else {
390                log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end);
391            }
392        }
393
394        ++i;
395    }
396
397    uset_close(aa);
398    return FALSE;
399}
400
401static UBool
402showAMinusB(const USet *a, const USet *b,
403            const char *a_name, const char *b_name,
404            UBool diffIsError) {
405    return showADiffB(a, b, a_name, b_name, TRUE, diffIsError);
406}
407
408static UBool
409showAIntersectB(const USet *a, const USet *b,
410                const char *a_name, const char *b_name,
411                UBool diffIsError) {
412    return showADiffB(a, b, a_name, b_name, FALSE, diffIsError);
413}
414
415static UBool
416compareUSets(const USet *a, const USet *b,
417             const char *a_name, const char *b_name,
418             UBool diffIsError) {
419    /*
420     * Use an arithmetic & not a logical && so that both branches
421     * are always taken and all differences are shown.
422     */
423    return
424        showAMinusB(a, b, a_name, b_name, diffIsError) &
425        showAMinusB(b, a, b_name, a_name, diffIsError);
426}
427
428/* test isLetter(u_isapha()) and isDigit(u_isdigit()) */
429static void TestLetterNumber()
430{
431    UChar i = 0x0000;
432
433    log_verbose("Testing for isalpha\n");
434    for (i = 0x0041; i < 0x005B; i++) {
435        if (!u_isalpha(i))
436        {
437            log_err("Failed isLetter test at  %.4X\n", i);
438        }
439    }
440    for (i = 0x0660; i < 0x066A; i++) {
441        if (u_isalpha(i))
442        {
443            log_err("Failed isLetter test with numbers at %.4X\n", i);
444        }
445    }
446
447    log_verbose("Testing for isdigit\n");
448    for (i = 0x0660; i < 0x066A; i++) {
449        if (!u_isdigit(i))
450        {
451            log_verbose("Failed isNumber test at %.4X\n", i);
452        }
453    }
454
455    log_verbose("Testing for isalnum\n");
456    for (i = 0x0041; i < 0x005B; i++) {
457        if (!u_isalnum(i))
458        {
459            log_err("Failed isAlNum test at  %.4X\n", i);
460        }
461    }
462    for (i = 0x0660; i < 0x066A; i++) {
463        if (!u_isalnum(i))
464        {
465            log_err("Failed isAlNum test at  %.4X\n", i);
466        }
467    }
468
469    {
470        /*
471         * The following checks work only starting from Unicode 4.0.
472         * Check the version number here.
473         */
474        static UVersionInfo u401={ 4, 0, 1, 0 };
475        UVersionInfo version;
476        u_getUnicodeVersion(version);
477        if(version[0]<4 || 0==memcmp(version, u401, 4)) {
478            return;
479        }
480    }
481
482    {
483        /*
484         * Sanity check:
485         * Verify that exactly the digit characters have decimal digit values.
486         * This assumption is used in the implementation of u_digit()
487         * (which checks nt=de)
488         * compared with the parallel java.lang.Character.digit()
489         * (which checks Nd).
490         *
491         * This was not true in Unicode 3.2 and earlier.
492         * Unicode 4.0 fixed discrepancies.
493         * Unicode 4.0.1 re-introduced problems in this area due to an
494         * unintentionally incomplete last-minute change.
495         */
496        U_STRING_DECL(digitsPattern, "[:Nd:]", 6);
497        U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
498
499        USet *digits, *decimalValues;
500        UErrorCode errorCode;
501
502        U_STRING_INIT(digitsPattern, "[:Nd:]", 6);
503        U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24);
504        errorCode=U_ZERO_ERROR;
505        digits=uset_openPattern(digitsPattern, 6, &errorCode);
506        decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode);
507
508        if(U_SUCCESS(errorCode)) {
509            compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE);
510        }
511
512        uset_close(digits);
513        uset_close(decimalValues);
514    }
515}
516
517/* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */
518static void TestMisc()
519{
520    static const UChar sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005};
521    static const UChar sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74};
522    static const UChar sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6b };
523    static const UChar sampleDefined[] = {0x523E, 0x4f88, 0xfffd};
524    static const UChar sampleBase[] = {0x0061, 0x0031, 0x03d2};
525    static const UChar sampleNonBase[] = {0x002B, 0x0020, 0x203B};
526/*    static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/
527    static const UChar sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5};
528    static const UChar sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE};
529    static const UChar sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c};
530    static const UChar sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f};
531
532
533    static const int32_t sampleDigitValues[] = {0, 2, 3, 5};
534
535    uint32_t mask;
536
537    int32_t i;
538    char icuVersion[U_MAX_VERSION_STRING_LENGTH];
539    UVersionInfo realVersion;
540
541    memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH);
542
543    log_verbose("Testing for isspace and nonspaces\n");
544    for (i = 0; i < 5; i++) {
545        if (!(u_isspace(sampleSpaces[i])) ||
546                (u_isspace(sampleNonSpaces[i])))
547        {
548            log_err("Space char test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
549        }
550        if (!(u_isJavaSpaceChar(sampleSpaces[i])) ||
551                (u_isJavaSpaceChar(sampleNonSpaces[i])))
552        {
553            log_err("u_isJavaSpaceChar() test error : %d or %d \n", (int32_t)sampleSpaces[i], (int32_t)sampleNonSpaces[i]);
554        }
555    }
556
557    log_verbose("Testing for isspace and nonspaces\n");
558    for (i = 0; i < 5; i++) {
559        if (!(u_isWhitespace(sampleWhiteSpaces[i])) ||
560                (u_isWhitespace(sampleNonWhiteSpaces[i])))
561        {
562            log_err("White Space char test error : %lx or %lx \n", sampleWhiteSpaces[i], sampleNonWhiteSpaces[i]);
563        }
564    }
565
566    log_verbose("Testing for isdefined\n");
567    for (i = 0; i < 3; i++) {
568        if ((u_isdefined(sampleUndefined[i])) ||
569                !(u_isdefined(sampleDefined[i])))
570        {
571            log_err("Undefined char test error : U+%04x or U+%04x\n", (int32_t)sampleUndefined[i], (int32_t)sampleDefined[i]);
572        }
573    }
574
575    log_verbose("Testing for isbase\n");
576    for (i = 0; i < 3; i++) {
577        if ((u_isbase(sampleNonBase[i])) ||
578                !(u_isbase(sampleBase[i])))
579        {
580            log_err("Non-baseform char test error : U+%04x or U+%04x",(int32_t)sampleNonBase[i], (int32_t)sampleBase[i]);
581        }
582    }
583
584    log_verbose("Testing for isdigit \n");
585    for (i = 0; i < 4; i++) {
586        if ((u_isdigit(sampleDigits[i]) &&
587            (u_charDigitValue(sampleDigits[i])!= sampleDigitValues[i])) ||
588            (u_isdigit(sampleNonDigits[i]))) {
589            log_err("Digit char test error : %lx   or   %lx\n", sampleDigits[i], sampleNonDigits[i]);
590        }
591    }
592
593    /* Tests the ICU version #*/
594    u_getVersion(realVersion);
595    u_versionToString(realVersion, icuVersion);
596    if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0)
597    {
598        log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion);
599    }
600#if defined(ICU_VERSION)
601    /* test only happens where we have configure.in with VERSION - sanity check. */
602    if(strcmp(U_ICU_VERSION, ICU_VERSION))
603    {
604        log_err("ICU version mismatch: Header says %s, build environment says %s.\n",  U_ICU_VERSION, ICU_VERSION);
605    }
606#endif
607
608    /* test U_GC_... */
609    if(
610        U_GET_GC_MASK(0x41)!=U_GC_LU_MASK ||
611        U_GET_GC_MASK(0x662)!=U_GC_ND_MASK ||
612        U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK ||
613        U_GET_GC_MASK(0x28)!=U_GC_PS_MASK ||
614        U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK ||
615        U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK
616    ) {
617        log_err("error: U_GET_GC_MASK does not work properly\n");
618    }
619
620    mask=0;
621    mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK;
622
623    mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK;
624    mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK;
625    mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK;
626    mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK;
627    mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK;
628
629    mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK;
630    mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK;
631    mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK;
632
633    mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK;
634    mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK;
635    mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK;
636
637    mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK;
638    mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK;
639    mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK;
640
641    mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK;
642    mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK;
643    mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK;
644    mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK;
645
646    mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK;
647    mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK;
648    mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK;
649    mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK;
650    mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK;
651
652    mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK;
653    mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK;
654    mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK;
655    mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK;
656
657    mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK;
658    mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK;
659
660    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
661        log_err("error: problems with U_GC_XX_MASK constants\n");
662    }
663
664    mask=0;
665    mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK;
666    mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK;
667    mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK;
668    mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK;
669    mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK;
670    mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK;
671    mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK;
672
673    if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) {
674        log_err("error: problems with U_GC_Y_MASK constants\n");
675    }
676    {
677        static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 };
678        for(i=0; i<10; i++){
679            if(digit[i]!=u_forDigit(i,10)){
680                log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10));
681            }
682        }
683    }
684
685    /* test u_digit() */
686    {
687        static const struct {
688            UChar32 c;
689            int8_t radix, value;
690        } data[]={
691            /* base 16 */
692            { 0x0031, 16, 1 },
693            { 0x0038, 16, 8 },
694            { 0x0043, 16, 12 },
695            { 0x0066, 16, 15 },
696            { 0x00e4, 16, -1 },
697            { 0x0662, 16, 2 },
698            { 0x06f5, 16, 5 },
699            { 0xff13, 16, 3 },
700            { 0xff41, 16, 10 },
701
702            /* base 8 */
703            { 0x0031, 8, 1 },
704            { 0x0038, 8, -1 },
705            { 0x0043, 8, -1 },
706            { 0x0066, 8, -1 },
707            { 0x00e4, 8, -1 },
708            { 0x0662, 8, 2 },
709            { 0x06f5, 8, 5 },
710            { 0xff13, 8, 3 },
711            { 0xff41, 8, -1 },
712
713            /* base 36 */
714            { 0x5a, 36, 35 },
715            { 0x7a, 36, 35 },
716            { 0xff3a, 36, 35 },
717            { 0xff5a, 36, 35 },
718
719            /* wrong radix values */
720            { 0x0031, 1, -1 },
721            { 0xff3a, 37, -1 }
722        };
723
724        for(i=0; i<LENGTHOF(data); ++i) {
725            if(u_digit(data[i].c, data[i].radix)!=data[i].value) {
726                log_err("u_digit(U+%04x, %d)=%d expected %d\n",
727                        data[i].c,
728                        data[i].radix,
729                        u_digit(data[i].c, data[i].radix),
730                        data[i].value);
731            }
732        }
733    }
734}
735
736/* test C/POSIX-style functions --------------------------------------------- */
737
738/* bit flags */
739#define ISAL     1
740#define ISLO     2
741#define ISUP     4
742
743#define ISDI     8
744#define ISXD  0x10
745
746#define ISAN  0x20
747
748#define ISPU  0x40
749#define ISGR  0x80
750#define ISPR 0x100
751
752#define ISSP 0x200
753#define ISBL 0x400
754#define ISCN 0x800
755
756/* C/POSIX-style functions, in the same order as the bit flags */
757typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c);
758
759static const struct {
760    IsPOSIXClass *fn;
761    const char *name;
762} posixClasses[]={
763    { u_isalpha, "isalpha" },
764    { u_islower, "islower" },
765    { u_isupper, "isupper" },
766    { u_isdigit, "isdigit" },
767    { u_isxdigit, "isxdigit" },
768    { u_isalnum, "isalnum" },
769    { u_ispunct, "ispunct" },
770    { u_isgraph, "isgraph" },
771    { u_isprint, "isprint" },
772    { u_isspace, "isspace" },
773    { u_isblank, "isblank" },
774    { u_iscntrl, "iscntrl" }
775};
776
777static const struct {
778    UChar32 c;
779    uint32_t posixResults;
780} posixData[]={
781    { 0x0008,                                                        ISCN },    /* backspace */
782    { 0x0009,                                              ISSP|ISBL|ISCN },    /* TAB */
783    { 0x000a,                                              ISSP|     ISCN },    /* LF */
784    { 0x000c,                                              ISSP|     ISCN },    /* FF */
785    { 0x000d,                                              ISSP|     ISCN },    /* CR */
786    { 0x0020,                                         ISPR|ISSP|ISBL      },    /* space */
787    { 0x0021,                               ISPU|ISGR|ISPR                },    /* ! */
788    { 0x0033,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* 3 */
789    { 0x0040,                               ISPU|ISGR|ISPR                },    /* @ */
790    { 0x0041, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* A */
791    { 0x007a, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* z */
792    { 0x007b,                               ISPU|ISGR|ISPR                },    /* { */
793    { 0x0085,                                              ISSP|     ISCN },    /* NEL */
794    { 0x00a0,                                         ISPR|ISSP|ISBL      },    /* NBSP */
795    { 0x00a4,                                    ISGR|ISPR                },    /* currency sign */
796    { 0x00e4, ISAL|ISLO|               ISAN|     ISGR|ISPR                },    /* a-umlaut */
797    { 0x0300,                                    ISGR|ISPR                },    /* combining grave */
798    { 0x0600,                                                        ISCN },    /* arabic number sign */
799    { 0x0627, ISAL|                    ISAN|     ISGR|ISPR                },    /* alef */
800    { 0x0663,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* arabic 3 */
801    { 0x2002,                                         ISPR|ISSP|ISBL      },    /* en space */
802    { 0x2007,                                         ISPR|ISSP|ISBL      },    /* figure space */
803    { 0x2009,                                         ISPR|ISSP|ISBL      },    /* thin space */
804    { 0x200b,                                                        ISCN },    /* ZWSP */
805  /*{ 0x200b,                                         ISPR|ISSP           },*/    /* ZWSP */ /* ZWSP became a control char in 4.0.1*/
806    { 0x200e,                                                        ISCN },    /* LRM */
807    { 0x2028,                                         ISPR|ISSP|     ISCN },    /* LS */
808    { 0x2029,                                         ISPR|ISSP|     ISCN },    /* PS */
809    { 0x20ac,                                    ISGR|ISPR                },    /* Euro */
810    { 0xff15,                ISDI|ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth 5 */
811    { 0xff25, ISAL|     ISUP|     ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth E */
812    { 0xff35, ISAL|     ISUP|          ISAN|     ISGR|ISPR                },    /* fullwidth U */
813    { 0xff45, ISAL|ISLO|          ISXD|ISAN|     ISGR|ISPR                },    /* fullwidth e */
814    { 0xff55, ISAL|ISLO|               ISAN|     ISGR|ISPR                }     /* fullwidth u */
815};
816
817static void
818TestPOSIX() {
819    uint32_t mask;
820    int32_t cl, i;
821    UBool expect;
822
823    mask=1;
824    for(cl=0; cl<12; ++cl) {
825        for(i=0; i<LENGTHOF(posixData); ++i) {
826            expect=(UBool)((posixData[i].posixResults&mask)!=0);
827            if(posixClasses[cl].fn(posixData[i].c)!=expect) {
828                log_err("u_%s(U+%04x)=%s is wrong\n",
829                    posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE");
830            }
831        }
832        mask<<=1;
833    }
834}
835
836/* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */
837static void TestControlPrint()
838{
839    const UChar sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b};
840    const UChar sampleNonControl[] = {0x61, 0x0031, 0x00e2};
841    const UChar samplePrintable[] = {0x0042, 0x005f, 0x2014};
842    const UChar sampleNonPrintable[] = {0x200c, 0x009f, 0x001b};
843    UChar32 c;
844    int i;
845
846    log_verbose("Testing for iscontrol\n");
847    for (i = 0; i < LENGTHOF(sampleControl); i++) {
848        if (!u_iscntrl(sampleControl[i]))
849        {
850            log_err("Control char test error : U+%04x should be control but is not\n", (int32_t)sampleControl[i]);
851        }
852    }
853
854    log_verbose("Testing for !iscontrol\n");
855    for (i = 0; i < LENGTHOF(sampleNonControl); i++) {
856        if (u_iscntrl(sampleNonControl[i]))
857        {
858            log_err("Control char test error : U+%04x should not be control but is\n", (int32_t)sampleNonControl[i]);
859        }
860    }
861
862    log_verbose("testing for isprintable\n");
863    for (i = 0; i < 3; i++) {
864        if (!u_isprint(samplePrintable[i]))
865        {
866            log_err("Printable char test error : U+%04x should be printable but is not\n", (int32_t)samplePrintable[i]);
867        }
868        if (u_isprint(sampleNonPrintable[i]))
869        {
870            log_err("Printable char test error : U+%04x should not be printable but is\n", (int32_t)sampleNonPrintable[i]);
871        }
872    }
873
874    /* test all ISO 8 controls */
875    for(c=0; c<=0x9f; ++c) {
876        if(c==0x20) {
877            /* skip ASCII graphic characters and continue with DEL */
878            c=0x7f;
879        }
880        if(!u_iscntrl(c)) {
881            log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c);
882        }
883        if(!u_isISOControl(c)) {
884            log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c);
885        }
886        if(u_isprint(c)) {
887            log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c);
888        }
889    }
890
891    /* test all Latin-1 graphic characters */
892    for(c=0x20; c<=0xff; ++c) {
893        if(c==0x7f) {
894            c=0xa0;
895        } else if(c==0xad) {
896            /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */
897            ++c;
898        }
899        if(!u_isprint(c)) {
900            log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c);
901        }
902    }
903}
904
905/* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/
906static void TestIdentifier()
907{
908    const UChar sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f};
909    const UChar sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082};
910    const UChar sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045};
911    const UChar sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020};
912    const UChar sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061};
913    const UChar sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019};
914    const UChar sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045};
915    const UChar sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020};
916    const UChar sampleIDIgnore[] = {0x0006, 0x0010, 0x206b};
917    const UChar sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061};
918
919    int i;
920
921    log_verbose("Testing sampleJavaID start \n");
922    for (i = 0; i < 3; i++) {
923        if (!(u_isJavaIDStart(sampleJavaIDStart[i])) ||
924                (u_isJavaIDStart(sampleNonJavaIDStart[i])))
925            log_err("Java ID Start char test error : %lx or %lx\n",
926            sampleJavaIDStart[i], sampleNonJavaIDStart[i]);
927    }
928
929    log_verbose("Testing sampleJavaID part \n");
930    for (i = 0; i < 3; i++) {
931        if (!(u_isJavaIDPart(sampleJavaIDPart[i])) ||
932                (u_isJavaIDPart(sampleNonJavaIDPart[i])))
933            log_err("Java ID Part char test error : %lx or %lx\n",
934             sampleJavaIDPart[i], sampleNonJavaIDPart[i]);
935    }
936
937    log_verbose("Testing sampleUnicodeID start \n");
938    for (i = 0; i < 3; i++) {
939        /* T_test_logln_ustr((int32_t)i); */
940        if (!(u_isIDStart(sampleUnicodeIDStart[i])) ||
941                (u_isIDStart(sampleNonUnicodeIDStart[i])))
942        {
943            log_err("Unicode ID Start char test error : %lx  or  %lx\n", sampleUnicodeIDStart[i],
944                                    sampleNonUnicodeIDStart[i]);
945        }
946    }
947
948    log_verbose("Testing sample unicode ID part \n");
949    for (i = 2; i < 3; i++) {   /* nos *** starts with 2 instead of 0, until clarified */
950        /* T_test_logln_ustr((int32_t)i); */
951        if (!(u_isIDPart(sampleUnicodeIDPart[i])) ||
952                (u_isIDPart(sampleNonUnicodeIDPart[i])))
953           {
954            log_err("Unicode ID Part char test error : %lx  or  %lx", sampleUnicodeIDPart[i], sampleNonUnicodeIDPart[i]);
955            }
956    }
957
958    log_verbose("Testing  sampleId ignore\n");
959    for (i = 0; i < 3; i++) {
960        /*T_test_logln_ustr((int32_t)i); */
961        if (!(u_isIDIgnorable(sampleIDIgnore[i])) ||
962                (u_isIDIgnorable(sampleNonIDIgnore[i])))
963        {
964            log_err("ID ignorable char test error : U+%04x  or  U+%04x\n", sampleIDIgnore[i], sampleNonIDIgnore[i]);
965        }
966    }
967}
968
969/* for each line of UnicodeData.txt, check some of the properties */
970/*
971 * ### TODO
972 * This test fails incorrectly if the First or Last code point of a repetitive area
973 * is overridden, which is allowed and is encouraged for the PUAs.
974 * Currently, this means that both area First/Last and override lines are
975 * tested against the properties from the API,
976 * and the area boundary will not match and cause an error.
977 *
978 * This function should detect area boundaries and skip them for the test of individual
979 * code points' properties.
980 * Then it should check that the areas contain all the same properties except where overridden.
981 * For this, it would have had to set a flag for which code points were listed explicitly.
982 */
983static void U_CALLCONV
984unicodeDataLineFn(void *context,
985                  char *fields[][2], int32_t fieldCount,
986                  UErrorCode *pErrorCode)
987{
988    char buffer[100];
989    char *end;
990    uint32_t value;
991    UChar32 c;
992    int32_t i;
993    int8_t type;
994
995    /* get the character code, field 0 */
996    c=strtoul(fields[0][0], &end, 16);
997    if(end<=fields[0][0] || end!=fields[0][1]) {
998        log_err("error: syntax error in field 0 at %s\n", fields[0][0]);
999        return;
1000    }
1001    if((uint32_t)c>=UCHAR_MAX_VALUE + 1) {
1002        log_err("error in UnicodeData.txt: code point %lu out of range\n", c);
1003        return;
1004    }
1005
1006    /* get general category, field 2 */
1007    *fields[2][1]=0;
1008    type = (int8_t)tagValues[MakeProp(fields[2][0])];
1009    if(u_charType(c)!=type) {
1010        log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type);
1011    }
1012    if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1013        log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1014    }
1015
1016    /* get canonical combining class, field 3 */
1017    value=strtoul(fields[3][0], &end, 10);
1018    if(end<=fields[3][0] || end!=fields[3][1]) {
1019        log_err("error: syntax error in field 3 at code 0x%lx\n", c);
1020        return;
1021    }
1022    if(value>255) {
1023        log_err("error in UnicodeData.txt: combining class %lu out of range\n", value);
1024        return;
1025    }
1026#if !UCONFIG_NO_NORMALIZATION
1027    if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) {
1028        log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value);
1029    }
1030#endif
1031
1032    /* get BiDi category, field 4 */
1033    *fields[4][1]=0;
1034    i=MakeDir(fields[4][0]);
1035    if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) {
1036        log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]);
1037    }
1038
1039    /* get ISO Comment, field 11 */
1040    *fields[11][1]=0;
1041    i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode);
1042    if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) {
1043        log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n",
1044            c, u_errorName(*pErrorCode),
1045            U_FAILURE(*pErrorCode) ? buffer : "[error]",
1046            fields[11][0]);
1047    }
1048
1049    /* get uppercase mapping, field 12 */
1050    if(fields[12][0]!=fields[12][1]) {
1051        value=strtoul(fields[12][0], &end, 16);
1052        if(end!=fields[12][1]) {
1053            log_err("error: syntax error in field 12 at code 0x%lx\n", c);
1054            return;
1055        }
1056        if((UChar32)value!=u_toupper(c)) {
1057            log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value);
1058        }
1059    } else {
1060        /* no case mapping: the API must map the code point to itself */
1061        if(c!=u_toupper(c)) {
1062            log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c));
1063        }
1064    }
1065
1066    /* get lowercase mapping, field 13 */
1067    if(fields[13][0]!=fields[13][1]) {
1068        value=strtoul(fields[13][0], &end, 16);
1069        if(end!=fields[13][1]) {
1070            log_err("error: syntax error in field 13 at code 0x%lx\n", c);
1071            return;
1072        }
1073        if((UChar32)value!=u_tolower(c)) {
1074            log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value);
1075        }
1076    } else {
1077        /* no case mapping: the API must map the code point to itself */
1078        if(c!=u_tolower(c)) {
1079            log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c));
1080        }
1081    }
1082
1083    /* get titlecase mapping, field 14 */
1084    if(fields[14][0]!=fields[14][1]) {
1085        value=strtoul(fields[14][0], &end, 16);
1086        if(end!=fields[14][1]) {
1087            log_err("error: syntax error in field 14 at code 0x%lx\n", c);
1088            return;
1089        }
1090        if((UChar32)value!=u_totitle(c)) {
1091            log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value);
1092        }
1093    } else {
1094        /* no case mapping: the API must map the code point to itself */
1095        if(c!=u_totitle(c)) {
1096            log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c));
1097        }
1098    }
1099}
1100
1101static UBool U_CALLCONV
1102enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1103    static const UChar32 test[][2]={
1104        {0x41, U_UPPERCASE_LETTER},
1105        {0x308, U_NON_SPACING_MARK},
1106        {0xfffe, U_GENERAL_OTHER_TYPES},
1107        {0xe0041, U_FORMAT_CHAR},
1108        {0xeffff, U_UNASSIGNED}
1109    };
1110
1111    int32_t i, count;
1112
1113    if(0!=strcmp((const char *)context, "a1")) {
1114        log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n");
1115        return FALSE;
1116    }
1117
1118    count=LENGTHOF(test);
1119    for(i=0; i<count; ++i) {
1120        if(start<=test[i][0] && test[i][0]<limit) {
1121            if(type!=(UCharCategory)test[i][1]) {
1122                log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n",
1123                        start, limit, (long)type, test[i][0], test[i][1]);
1124            }
1125            /* stop at the range that includes the last test code point (increases code coverage for enumeration) */
1126            return i==(count-1) ? FALSE : TRUE;
1127        }
1128    }
1129
1130    if(start>test[count-1][0]) {
1131        log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n",
1132                start, limit, (long)type);
1133        return FALSE;
1134    }
1135
1136    return TRUE;
1137}
1138
1139static UBool U_CALLCONV
1140enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) {
1141    /* default Bidi classes for unassigned code points */
1142    static const int32_t defaultBidi[][2]={ /* { limit, class } */
1143        { 0x0590, U_LEFT_TO_RIGHT },
1144        { 0x0600, U_RIGHT_TO_LEFT },
1145        { 0x07C0, U_RIGHT_TO_LEFT_ARABIC },
1146        { 0x0900, U_RIGHT_TO_LEFT },
1147        { 0xFB1D, U_LEFT_TO_RIGHT },
1148        { 0xFB50, U_RIGHT_TO_LEFT },
1149        { 0xFE00, U_RIGHT_TO_LEFT_ARABIC },
1150        { 0xFE70, U_LEFT_TO_RIGHT },
1151        { 0xFF00, U_RIGHT_TO_LEFT_ARABIC },
1152        { 0x10800, U_LEFT_TO_RIGHT },
1153        { 0x11000, U_RIGHT_TO_LEFT },
1154        { 0x110000, U_LEFT_TO_RIGHT }
1155    };
1156
1157    UChar32 c;
1158    int32_t i;
1159    UCharDirection shouldBeDir;
1160
1161    /*
1162     * LineBreak.txt specifies:
1163     *   #  - Assigned characters that are not listed explicitly are given the value
1164     *   #    "AL".
1165     *   #  - Unassigned characters are given the value "XX".
1166     *
1167     * PUA characters are listed explicitly with "XX".
1168     * Verify that no assigned character has "XX".
1169     */
1170    if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) {
1171        c=start;
1172        while(c<limit) {
1173            if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) {
1174                log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c);
1175            }
1176            ++c;
1177        }
1178    }
1179
1180    /*
1181     * Verify default Bidi classes.
1182     * For recent Unicode versions, see UCD.html.
1183     *
1184     * For older Unicode versions:
1185     * See table 3-7 "Bidirectional Character Types" in UAX #9.
1186     * http://www.unicode.org/reports/tr9/
1187     *
1188     * See also DerivedBidiClass.txt for Cn code points!
1189     *
1190     * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html)
1191     * changed some default values.
1192     * In particular, non-characters and unassigned Default Ignorable Code Points
1193     * change from L to BN.
1194     *
1195     * UCD.html version 4.0.1 does not yet reflect these changes.
1196     */
1197    if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) {
1198        /* enumerate the intersections of defaultBidi ranges with [start..limit[ */
1199        c=start;
1200        for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) {
1201            if((int32_t)c<defaultBidi[i][0]) {
1202                while(c<limit && (int32_t)c<defaultBidi[i][0]) {
1203                    if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) {
1204                        shouldBeDir=U_BOUNDARY_NEUTRAL;
1205                    } else {
1206                        shouldBeDir=(UCharDirection)defaultBidi[i][1];
1207                    }
1208
1209                    if( u_charDirection(c)!=shouldBeDir ||
1210                        u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir
1211                    ) {
1212                        log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n",
1213                            c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]);
1214                    }
1215                    ++c;
1216                }
1217            }
1218        }
1219    }
1220
1221    return TRUE;
1222}
1223
1224/* tests for several properties */
1225static void TestUnicodeData()
1226{
1227    UVersionInfo expectVersionArray;
1228    UVersionInfo versionArray;
1229    char *fields[15][2];
1230    UErrorCode errorCode;
1231    UChar32 c;
1232    int8_t type;
1233
1234    u_versionFromString(expectVersionArray, U_UNICODE_VERSION);
1235    u_getUnicodeVersion(versionArray);
1236    if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0)
1237    {
1238        log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n",
1239        versionArray[0], versionArray[1], versionArray[2], versionArray[3]);
1240    }
1241
1242#if defined(ICU_UNICODE_VERSION)
1243    /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */
1244    if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION))
1245    {
1246         log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n");
1247    }
1248#endif
1249
1250    if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) {
1251        log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041));
1252    }
1253
1254    errorCode=U_ZERO_ERROR;
1255    parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode);
1256    if(U_FAILURE(errorCode)) {
1257        return; /* if we couldn't parse UnicodeData.txt, we should return */
1258    }
1259
1260    /* sanity check on repeated properties */
1261    for(c=0xfffe; c<=0x10ffff;) {
1262        type=u_charType(c);
1263        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1264            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1265        }
1266        if(type!=U_UNASSIGNED) {
1267            log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c));
1268        }
1269        if((c&0xffff)==0xfffe) {
1270            ++c;
1271        } else {
1272            c+=0xffff;
1273        }
1274    }
1275
1276    /* test that PUA is not "unassigned" */
1277    for(c=0xe000; c<=0x10fffd;) {
1278        type=u_charType(c);
1279        if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) {
1280            log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c);
1281        }
1282        if(type==U_UNASSIGNED) {
1283            log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c);
1284        } else if(type!=U_PRIVATE_USE_CHAR) {
1285            log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type);
1286        }
1287        if(c==0xf8ff) {
1288            c=0xf0000;
1289        } else if(c==0xffffd) {
1290            c=0x100000;
1291        } else {
1292            ++c;
1293        }
1294    }
1295
1296    /* test u_enumCharTypes() */
1297    u_enumCharTypes(enumTypeRange, "a1");
1298
1299    /* check default properties */
1300    u_enumCharTypes(enumDefaultsRange, NULL);
1301}
1302
1303static void TestCodeUnit(){
1304    const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0};
1305
1306    int32_t i;
1307
1308    for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){
1309        UChar c=codeunit[i];
1310        if(i<4){
1311            if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){
1312                log_err("ERROR: U+%04x is a single", c);
1313            }
1314
1315        }
1316        if(i >= 4 && i< 8){
1317            if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){
1318                log_err("ERROR: U+%04x is a first surrogate", c);
1319            }
1320        }
1321        if(i >= 8 && i< 12){
1322            if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){
1323                log_err("ERROR: U+%04x is a second surrogate", c);
1324            }
1325        }
1326    }
1327
1328}
1329
1330static void TestCodePoint(){
1331    const UChar32 codePoint[]={
1332        /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */
1333        0xd800,
1334        0xdbff,
1335        0xdc00,
1336        0xdfff,
1337        0xdc04,
1338        0xd821,
1339        /*not a surrogate, valid, isUnicodeChar , not Error*/
1340        0x20ac,
1341        0xd7ff,
1342        0xe000,
1343        0xe123,
1344        0x0061,
1345        0xe065,
1346        0x20402,
1347        0x24506,
1348        0x23456,
1349        0x20402,
1350        0x10402,
1351        0x23456,
1352        /*not a surrogate, not valid, isUnicodeChar, isError */
1353        0x0015,
1354        0x009f,
1355        /*not a surrogate, not valid, not isUnicodeChar, isError */
1356        0xffff,
1357        0xfffe,
1358    };
1359    int32_t i;
1360    for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){
1361        UChar32 c=codePoint[i];
1362        if(i<6){
1363            if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){
1364                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1365            }
1366            if(UTF_IS_VALID(c)){
1367                log_err("ERROR: isValid() failed for U+%04x\n", c);
1368            }
1369            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1370                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1371            }
1372            if(UTF_IS_ERROR(c)){
1373                log_err("ERROR: isError() failed for U+%04x\n", c);
1374            }
1375        }else if(i >=6 && i<18){
1376            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1377                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1378            }
1379            if(!UTF_IS_VALID(c)){
1380                log_err("ERROR: isValid() failed for U+%04x\n", c);
1381            }
1382            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1383                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1384            }
1385            if(UTF_IS_ERROR(c)){
1386                log_err("ERROR: isError() failed for U+%04x\n", c);
1387            }
1388        }else if(i >=18 && i<20){
1389            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1390                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1391            }
1392            if(UTF_IS_VALID(c)){
1393                log_err("ERROR: isValid() failed for U+%04x\n", c);
1394            }
1395            if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){
1396                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1397            }
1398            if(!UTF_IS_ERROR(c)){
1399                log_err("ERROR: isError() failed for U+%04x\n", c);
1400            }
1401        }
1402        else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){
1403            if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){
1404                log_err("ERROR: isSurrogate() failed for U+%04x\n", c);
1405            }
1406            if(UTF_IS_VALID(c)){
1407                log_err("ERROR: isValid() failed for U+%04x\n", c);
1408            }
1409            if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){
1410                log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c);
1411            }
1412            if(!UTF_IS_ERROR(c)){
1413                log_err("ERROR: isError() failed for U+%04x\n", c);
1414            }
1415        }
1416    }
1417
1418    if(
1419        !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) ||
1420        !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) ||
1421        U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) ||
1422        U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff)
1423    ) {
1424        log_err("error with U_IS_BMP()\n");
1425    }
1426
1427    if(
1428        U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) ||
1429        U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) ||
1430        U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) ||
1431        !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff)
1432    ) {
1433        log_err("error with U_IS_SUPPLEMENTARY()\n");
1434    }
1435}
1436
1437static void TestCharLength()
1438{
1439    const int32_t codepoint[]={
1440        1, 0x0061,
1441        1, 0xe065,
1442        1, 0x20ac,
1443        2, 0x20402,
1444        2, 0x23456,
1445        2, 0x24506,
1446        2, 0x20402,
1447        2, 0x10402,
1448        1, 0xd7ff,
1449        1, 0xe000
1450    };
1451
1452    int32_t i;
1453    UBool multiple;
1454    for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){
1455        UChar32 c=codepoint[i+1];
1456        if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){
1457            log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c));
1458        }
1459        multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE);
1460        if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){
1461            log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c);
1462        }
1463    }
1464}
1465
1466/*internal functions ----*/
1467static int32_t MakeProp(char* str)
1468{
1469    int32_t result = 0;
1470    char* matchPosition =0;
1471
1472    matchPosition = strstr(tagStrings, str);
1473    if (matchPosition == 0)
1474    {
1475        log_err("unrecognized type letter ");
1476        log_err(str);
1477    }
1478    else
1479        result = (int32_t)((matchPosition - tagStrings) / 2);
1480    return result;
1481}
1482
1483static int32_t MakeDir(char* str)
1484{
1485    int32_t pos = 0;
1486    for (pos = 0; pos < 19; pos++) {
1487        if (strcmp(str, dirStrings[pos]) == 0) {
1488            return pos;
1489        }
1490    }
1491    return -1;
1492}
1493
1494/* test u_charName() -------------------------------------------------------- */
1495
1496static const struct {
1497    uint32_t code;
1498    const char *name, *oldName, *extName;
1499} names[]={
1500    {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"},
1501    {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "LATIN SMALL LETTER DOTLESS J BAR HOOK", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" },
1502    {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" },
1503    {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" },
1504    {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" },
1505    {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" },
1506    {0xd800, "", "", "<lead surrogate-D800>" },
1507    {0xdc00, "", "", "<trail surrogate-DC00>" },
1508    {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" },
1509    {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" },
1510    {0xffff, "", "", "<noncharacter-FFFF>" },
1511    {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" }
1512};
1513
1514static UBool
1515enumCharNamesFn(void *context,
1516                UChar32 code, UCharNameChoice nameChoice,
1517                const char *name, int32_t length) {
1518    int32_t *pCount=(int32_t *)context;
1519    int i;
1520
1521    if(length<=0 || length!=(int32_t)strlen(name)) {
1522        /* should not be called with an empty string or invalid length */
1523        log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length);
1524        return TRUE;
1525    }
1526
1527    ++*pCount;
1528    for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) {
1529        if(code==(UChar32)names[i].code) {
1530            switch (nameChoice) {
1531                case U_EXTENDED_CHAR_NAME:
1532                    if(0!=strcmp(name, names[i].extName)) {
1533                        log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName);
1534                    }
1535                    break;
1536                case U_UNICODE_CHAR_NAME:
1537                    if(0!=strcmp(name, names[i].name)) {
1538                        log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name);
1539                    }
1540                    break;
1541                case U_UNICODE_10_CHAR_NAME:
1542                    if(names[i].oldName[0]==0 || 0!=strcmp(name, names[i].oldName)) {
1543                        log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, names[i].oldName);
1544                    }
1545                    break;
1546                case U_CHAR_NAME_CHOICE_COUNT:
1547                    break;
1548            }
1549            break;
1550        }
1551    }
1552    return TRUE;
1553}
1554
1555struct enumExtCharNamesContext {
1556    uint32_t length;
1557    int32_t last;
1558};
1559
1560static UBool
1561enumExtCharNamesFn(void *context,
1562                UChar32 code, UCharNameChoice nameChoice,
1563                const char *name, int32_t length) {
1564    struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context;
1565
1566    if (ecncp->last != (int32_t) code - 1) {
1567        if (ecncp->last < 0) {
1568            log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1);
1569        } else {
1570            log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code);
1571        }
1572    }
1573    ecncp->last = (int32_t) code;
1574
1575    if (!*name) {
1576        log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code);
1577    }
1578
1579    return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length);
1580}
1581
1582/**
1583 * This can be made more efficient by moving it into putil.c and having
1584 * it directly access the ebcdic translation tables.
1585 * TODO: If we get this method in putil.c, then delete it from here.
1586 */
1587static UChar
1588u_charToUChar(char c) {
1589    UChar uc;
1590    u_charsToUChars(&c, &uc, 1);
1591    return uc;
1592}
1593
1594static void
1595TestCharNames() {
1596    static char name[80];
1597    UErrorCode errorCode=U_ZERO_ERROR;
1598    struct enumExtCharNamesContext extContext;
1599    int32_t length;
1600    UChar32 c;
1601    int32_t i;
1602
1603    log_verbose("Testing uprv_getMaxCharNameLength()\n");
1604    length=uprv_getMaxCharNameLength();
1605    if(length==0) {
1606        /* no names data available */
1607        return;
1608    }
1609    if(length<83) { /* Unicode 3.2 max char name length */
1610        log_err("uprv_getMaxCharNameLength()=%d is too short");
1611    }
1612    /* ### TODO same tests for max ISO comment length as for max name length */
1613
1614    log_verbose("Testing u_charName()\n");
1615    for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) {
1616        /* modern Unicode character name */
1617        length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode);
1618        if(U_FAILURE(errorCode)) {
1619            log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode));
1620            return;
1621        }
1622        if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) {
1623            log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name);
1624        }
1625
1626        /* find the modern name */
1627        if (*names[i].name) {
1628            c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode);
1629            if(U_FAILURE(errorCode)) {
1630                log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode));
1631                return;
1632            }
1633            if(c!=(UChar32)names[i].code) {
1634                log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code);
1635            }
1636        }
1637
1638        /* Unicode 1.0 character name */
1639        length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode);
1640        if(U_FAILURE(errorCode)) {
1641            log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode));
1642            return;
1643        }
1644        if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) {
1645            log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName);
1646        }
1647
1648        /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */
1649        if(names[i].oldName[0]!=0 /* && length>0 */) {
1650            c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode);
1651            if(U_FAILURE(errorCode)) {
1652                log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode));
1653                return;
1654            }
1655            if(c!=(UChar32)names[i].code) {
1656                log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code);
1657            }
1658        }
1659    }
1660
1661    /* test u_enumCharNames() */
1662    length=0;
1663    errorCode=U_ZERO_ERROR;
1664    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode);
1665    if(U_FAILURE(errorCode) || length<94140) {
1666        log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length);
1667    }
1668
1669    extContext.length = 0;
1670    extContext.last = -1;
1671    errorCode=U_ZERO_ERROR;
1672    u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode);
1673    if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) {
1674        log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length);
1675    }
1676
1677    /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */
1678    if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) {
1679        log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode));
1680    }
1681
1682    /* Test getCharNameCharacters */
1683    if(!QUICK) {
1684        enum { BUFSIZE = 256 };
1685        UErrorCode ec = U_ZERO_ERROR;
1686        char buf[BUFSIZE];
1687        int32_t maxLength;
1688        UChar32 cp;
1689        UChar pat[BUFSIZE], dumbPat[BUFSIZE];
1690        int32_t l1, l2;
1691        UBool map[256];
1692        UBool ok;
1693
1694        USet* set = uset_open(1, 0); /* empty set */
1695        USet* dumb = uset_open(1, 0); /* empty set */
1696
1697        /*
1698         * uprv_getCharNameCharacters() will likely return more lowercase
1699         * letters than actual character names contain because
1700         * it includes all the characters in lowercased names of
1701         * general categories, for the full possible set of extended names.
1702         */
1703        {
1704            USetAdder sa={
1705                NULL,
1706                uset_add,
1707                uset_addRange,
1708                uset_addString,
1709                NULL /* don't need remove() */
1710            };
1711            sa.set=set;
1712            uprv_getCharNameCharacters(&sa);
1713        }
1714
1715        /* build set the dumb (but sure-fire) way */
1716        for (i=0; i<256; ++i) {
1717            map[i] = FALSE;
1718        }
1719
1720        maxLength=0;
1721        for (cp=0; cp<0x110000; ++cp) {
1722            int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME,
1723                                     buf, BUFSIZE, &ec);
1724            if (U_FAILURE(ec)) {
1725                log_err("FAIL: u_charName failed when it shouldn't\n");
1726                uset_close(set);
1727                uset_close(dumb);
1728                return;
1729            }
1730            if(len>maxLength) {
1731                maxLength=len;
1732            }
1733
1734            for (i=0; i<len; ++i) {
1735                if (!map[(uint8_t) buf[i]]) {
1736                    uset_add(dumb, (UChar32)u_charToUChar(buf[i]));
1737                    map[(uint8_t) buf[i]] = TRUE;
1738                }
1739            }
1740
1741            /* test for leading/trailing whitespace */
1742            if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') {
1743                log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp);
1744            }
1745        }
1746
1747        if(map[(uint8_t)'\t']) {
1748            log_err("u_charName() returned a name with a TAB for some code point\n", cp);
1749        }
1750
1751        length=uprv_getMaxCharNameLength();
1752        if(length!=maxLength) {
1753            log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n",
1754                    length, maxLength);
1755        }
1756
1757        /* compare the sets.  Where is my uset_equals?!! */
1758        ok=TRUE;
1759        for(i=0; i<256; ++i) {
1760            if(uset_contains(set, i)!=uset_contains(dumb, i)) {
1761                if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) {
1762                    /* ignore lowercase a-z that are in set but not in dumb */
1763                    ok=TRUE;
1764                } else {
1765                    ok=FALSE;
1766                    break;
1767                }
1768            }
1769        }
1770
1771        l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec);
1772        l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec);
1773        if (U_FAILURE(ec)) {
1774            log_err("FAIL: uset_toPattern failed when it shouldn't\n");
1775            uset_close(set);
1776            uset_close(dumb);
1777            return;
1778        }
1779
1780        if (l1 >= BUFSIZE) {
1781            l1 = BUFSIZE-1;
1782            pat[l1] = 0;
1783        }
1784        if (l2 >= BUFSIZE) {
1785            l2 = BUFSIZE-1;
1786            dumbPat[l2] = 0;
1787        }
1788
1789        if (!ok) {
1790            log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n",
1791                    aescstrdup(pat, l1), aescstrdup(dumbPat, l2));
1792        } else if(VERBOSITY) {
1793            log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1));
1794        }
1795
1796        uset_close(set);
1797        uset_close(dumb);
1798    }
1799
1800    /* ### TODO: test error cases and other interesting things */
1801}
1802
1803/* test u_isMirrored() and u_charMirror() ----------------------------------- */
1804
1805static void
1806TestMirroring() {
1807    USet *set;
1808    UErrorCode errorCode;
1809
1810    UChar32 start, end, c2, c3;
1811    int32_t i;
1812
1813    U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1814
1815    U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17);
1816
1817    log_verbose("Testing u_isMirrored()\n");
1818    if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) &&
1819         !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400)
1820        )
1821    ) {
1822        log_err("u_isMirrored() does not work correctly\n");
1823    }
1824
1825    log_verbose("Testing u_charMirror()\n");
1826    if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 &&
1827         u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */
1828         u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab &&
1829         /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
1830         u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d
1831         )
1832    ) {
1833        log_err("u_charMirror() does not work correctly\n");
1834    }
1835
1836    /* verify that Bidi_Mirroring_Glyph roundtrips */
1837    errorCode=U_ZERO_ERROR;
1838    set=uset_openPattern(mirroredPattern, 17, &errorCode);
1839
1840    if (U_FAILURE(errorCode)) {
1841        log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n");
1842    } else {
1843        for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) {
1844            do {
1845                c2=u_charMirror(start);
1846                c3=u_charMirror(c2);
1847                if(c3!=start) {
1848                    log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3);
1849                }
1850            } while(++start<=end);
1851        }
1852    }
1853
1854    uset_close(set);
1855}
1856
1857
1858struct RunTestData
1859{
1860    const char *runText;
1861    UScriptCode runCode;
1862};
1863
1864typedef struct RunTestData RunTestData;
1865
1866static void
1867CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns,
1868                const char *prefix)
1869{
1870    int32_t run, runStart, runLimit;
1871    UScriptCode runCode;
1872
1873    /* iterate over all the runs */
1874    run = 0;
1875    while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) {
1876        if (runStart != runStarts[run]) {
1877            log_err("%s: incorrect start offset for run %d: expected %d, got %d\n",
1878                prefix, run, runStarts[run], runStart);
1879        }
1880
1881        if (runLimit != runStarts[run + 1]) {
1882            log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n",
1883                prefix, run, runStarts[run + 1], runLimit);
1884        }
1885
1886        if (runCode != testData[run].runCode) {
1887            log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n",
1888                prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode));
1889        }
1890
1891        run += 1;
1892
1893        /* stop when we've seen all the runs we expect to see */
1894        if (run >= nRuns) {
1895            break;
1896        }
1897    }
1898
1899    /* Complain if we didn't see then number of runs we expected */
1900    if (run != nRuns) {
1901        log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns);
1902    }
1903}
1904
1905static void
1906TestUScriptRunAPI()
1907{
1908    static const RunTestData testData1[] = {
1909        {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI},
1910        {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC},
1911        {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC},
1912        {"English (", USCRIPT_LATIN},
1913        {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI},
1914        {") ", USCRIPT_LATIN},
1915        {"\\u6F22\\u5B75", USCRIPT_HAN},
1916        {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA},
1917        {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA},
1918        {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET}
1919    };
1920
1921    static const RunTestData testData2[] = {
1922       {"((((((((((abc))))))))))", USCRIPT_LATIN}
1923    };
1924
1925    static const struct {
1926      const RunTestData *testData;
1927      int32_t nRuns;
1928    } testDataEntries[] = {
1929        {testData1, LENGTHOF(testData1)},
1930        {testData2, LENGTHOF(testData2)}
1931    };
1932
1933    static const int32_t nTestEntries = LENGTHOF(testDataEntries);
1934    int32_t testEntry;
1935
1936    for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) {
1937        UChar testString[1024];
1938        int32_t runStarts[256];
1939        int32_t nTestRuns = testDataEntries[testEntry].nRuns;
1940        const RunTestData *testData = testDataEntries[testEntry].testData;
1941
1942        int32_t run, stringLimit;
1943        UScriptRun *scriptRun = NULL;
1944        UErrorCode err;
1945
1946        /*
1947         * Fill in the test string and the runStarts array.
1948         */
1949        stringLimit = 0;
1950        for (run = 0; run < nTestRuns; run += 1) {
1951            runStarts[run] = stringLimit;
1952            stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit);
1953            /*stringLimit -= 1;*/
1954        }
1955
1956        /* The limit of the last run */
1957        runStarts[nTestRuns] = stringLimit;
1958
1959        /*
1960         * Make sure that calling uscript_OpenRun with a NULL text pointer
1961         * and a non-zero text length returns the correct error.
1962         */
1963        err = U_ZERO_ERROR;
1964        scriptRun = uscript_openRun(NULL, stringLimit, &err);
1965
1966        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1967            log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1968        }
1969
1970        if (scriptRun != NULL) {
1971            log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n");
1972            uscript_closeRun(scriptRun);
1973        }
1974
1975        /*
1976         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
1977         * and a zero text length returns the correct error.
1978         */
1979        err = U_ZERO_ERROR;
1980        scriptRun = uscript_openRun(testString, 0, &err);
1981
1982        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
1983            log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
1984        }
1985
1986        if (scriptRun != NULL) {
1987            log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n");
1988            uscript_closeRun(scriptRun);
1989        }
1990
1991        /*
1992         * Make sure that calling uscript_openRun with a NULL text pointer
1993         * and a zero text length doesn't return an error.
1994         */
1995        err = U_ZERO_ERROR;
1996        scriptRun = uscript_openRun(NULL, 0, &err);
1997
1998        if (U_FAILURE(err)) {
1999            log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err));
2000        }
2001
2002        /* Make sure that the empty iterator doesn't find any runs */
2003        if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) {
2004            log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n");
2005        }
2006
2007        /*
2008         * Make sure that calling uscript_setRunText with a NULL text pointer
2009         * and a non-zero text length returns the correct error.
2010         */
2011        err = U_ZERO_ERROR;
2012        uscript_setRunText(scriptRun, NULL, stringLimit, &err);
2013
2014        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2015            log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2016        }
2017
2018        /*
2019         * Make sure that calling uscript_OpenRun with a non-NULL text pointer
2020         * and a zero text length returns the correct error.
2021         */
2022        err = U_ZERO_ERROR;
2023        uscript_setRunText(scriptRun, testString, 0, &err);
2024
2025        if (err != U_ILLEGAL_ARGUMENT_ERROR) {
2026            log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err));
2027        }
2028
2029        /*
2030         * Now call uscript_setRunText on the empty iterator
2031         * and make sure that it works.
2032         */
2033        err = U_ZERO_ERROR;
2034        uscript_setRunText(scriptRun, testString, stringLimit, &err);
2035
2036        if (U_FAILURE(err)) {
2037            log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err));
2038        } else {
2039            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText");
2040        }
2041
2042        uscript_closeRun(scriptRun);
2043
2044        /*
2045         * Now open an interator over the testString
2046         * using uscript_openRun and make sure that it works
2047         */
2048        scriptRun = uscript_openRun(testString, stringLimit, &err);
2049
2050        if (U_FAILURE(err)) {
2051            log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err));
2052        } else {
2053            CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun");
2054        }
2055
2056        /* Now reset the iterator, and make sure
2057         * that it still works.
2058         */
2059        uscript_resetRun(scriptRun);
2060
2061        CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun");
2062
2063        /* Close the iterator */
2064        uscript_closeRun(scriptRun);
2065    }
2066}
2067
2068/* test additional, non-core properties */
2069static void
2070TestAdditionalProperties() {
2071    /* test data for u_charAge() */
2072    static const struct {
2073        UChar32 c;
2074        UVersionInfo version;
2075    } charAges[]={
2076        {0x41,    { 1, 1, 0, 0 }},
2077        {0xffff,  { 1, 1, 0, 0 }},
2078        {0x20ab,  { 2, 0, 0, 0 }},
2079        {0x2fffe, { 2, 0, 0, 0 }},
2080        {0x20ac,  { 2, 1, 0, 0 }},
2081        {0xfb1d,  { 3, 0, 0, 0 }},
2082        {0x3f4,   { 3, 1, 0, 0 }},
2083        {0x10300, { 3, 1, 0, 0 }},
2084        {0x220,   { 3, 2, 0, 0 }},
2085        {0xff60,  { 3, 2, 0, 0 }}
2086    };
2087
2088    /* test data for u_hasBinaryProperty() */
2089    static const int32_t
2090    props[][3]={ /* code point, property, value */
2091        { 0x0627, UCHAR_ALPHABETIC, TRUE },
2092        { 0x1034a, UCHAR_ALPHABETIC, TRUE },
2093        { 0x2028, UCHAR_ALPHABETIC, FALSE },
2094
2095        { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE },
2096        { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE },
2097
2098        { 0x202c, UCHAR_BIDI_CONTROL, TRUE },
2099        { 0x202f, UCHAR_BIDI_CONTROL, FALSE },
2100
2101        { 0x003c, UCHAR_BIDI_MIRRORED, TRUE },
2102        { 0x003d, UCHAR_BIDI_MIRRORED, FALSE },
2103
2104        /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
2105        { 0x2018, UCHAR_BIDI_MIRRORED, FALSE },
2106        { 0x201d, UCHAR_BIDI_MIRRORED, FALSE },
2107        { 0x201f, UCHAR_BIDI_MIRRORED, FALSE },
2108        { 0x301e, UCHAR_BIDI_MIRRORED, FALSE },
2109
2110        { 0x058a, UCHAR_DASH, TRUE },
2111        { 0x007e, UCHAR_DASH, FALSE },
2112
2113        { 0x0c4d, UCHAR_DIACRITIC, TRUE },
2114        { 0x3000, UCHAR_DIACRITIC, FALSE },
2115
2116        { 0x0e46, UCHAR_EXTENDER, TRUE },
2117        { 0x0020, UCHAR_EXTENDER, FALSE },
2118
2119#if !UCONFIG_NO_NORMALIZATION
2120        { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2121        { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE },
2122        { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE },
2123
2124        { 0x110a, UCHAR_NFD_INERT, TRUE },      /* Jamo L */
2125        { 0x0308, UCHAR_NFD_INERT, FALSE },
2126
2127        { 0x1164, UCHAR_NFKD_INERT, TRUE },     /* Jamo V */
2128        { 0x1d79d, UCHAR_NFKD_INERT, FALSE },   /* math compat version of xi */
2129
2130        { 0x0021, UCHAR_NFC_INERT, TRUE },      /* ! */
2131        { 0x0061, UCHAR_NFC_INERT, FALSE },     /* a */
2132        { 0x00e4, UCHAR_NFC_INERT, FALSE },     /* a-umlaut */
2133        { 0x0102, UCHAR_NFC_INERT, FALSE },     /* a-breve */
2134        { 0xac1c, UCHAR_NFC_INERT, FALSE },     /* Hangul LV */
2135        { 0xac1d, UCHAR_NFC_INERT, TRUE },      /* Hangul LVT */
2136
2137        { 0x1d79d, UCHAR_NFKC_INERT, FALSE },   /* math compat version of xi */
2138        { 0x2a6d6, UCHAR_NFKC_INERT, TRUE },    /* Han, last of CJK ext. B */
2139
2140        { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE },
2141        { 0x0308, UCHAR_SEGMENT_STARTER, FALSE },
2142        { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */
2143        { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */
2144        { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */
2145        { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */
2146#endif
2147
2148        { 0x0044, UCHAR_HEX_DIGIT, TRUE },
2149        { 0xff46, UCHAR_HEX_DIGIT, TRUE },
2150        { 0x0047, UCHAR_HEX_DIGIT, FALSE },
2151
2152        { 0x30fb, UCHAR_HYPHEN, TRUE },
2153        { 0xfe58, UCHAR_HYPHEN, FALSE },
2154
2155        { 0x2172, UCHAR_ID_CONTINUE, TRUE },
2156        { 0x0307, UCHAR_ID_CONTINUE, TRUE },
2157        { 0x005c, UCHAR_ID_CONTINUE, FALSE },
2158
2159        { 0x2172, UCHAR_ID_START, TRUE },
2160        { 0x007a, UCHAR_ID_START, TRUE },
2161        { 0x0039, UCHAR_ID_START, FALSE },
2162
2163        { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE },
2164        { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE },
2165        { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE },
2166
2167        { 0x200c, UCHAR_JOIN_CONTROL, TRUE },
2168        { 0x2029, UCHAR_JOIN_CONTROL, FALSE },
2169
2170        { 0x1d7bc, UCHAR_LOWERCASE, TRUE },
2171        { 0x0345, UCHAR_LOWERCASE, TRUE },
2172        { 0x0030, UCHAR_LOWERCASE, FALSE },
2173
2174        { 0x1d7a9, UCHAR_MATH, TRUE },
2175        { 0x2135, UCHAR_MATH, TRUE },
2176        { 0x0062, UCHAR_MATH, FALSE },
2177
2178        { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2179        { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE },
2180        { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE },
2181
2182        { 0x0022, UCHAR_QUOTATION_MARK, TRUE },
2183        { 0xff62, UCHAR_QUOTATION_MARK, TRUE },
2184        { 0xd840, UCHAR_QUOTATION_MARK, FALSE },
2185
2186        { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE },
2187        { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE },
2188
2189        { 0x1d44a, UCHAR_UPPERCASE, TRUE },
2190        { 0x2162, UCHAR_UPPERCASE, TRUE },
2191        { 0x0345, UCHAR_UPPERCASE, FALSE },
2192
2193        { 0x0020, UCHAR_WHITE_SPACE, TRUE },
2194        { 0x202f, UCHAR_WHITE_SPACE, TRUE },
2195        { 0x3001, UCHAR_WHITE_SPACE, FALSE },
2196
2197        { 0x0711, UCHAR_XID_CONTINUE, TRUE },
2198        { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE },
2199        { 0x007c, UCHAR_XID_CONTINUE, FALSE },
2200
2201        { 0x16ee, UCHAR_XID_START, TRUE },
2202        { 0x23456, UCHAR_XID_START, TRUE },
2203        { 0x1d1aa, UCHAR_XID_START, FALSE },
2204
2205        /*
2206         * Version break:
2207         * The following properties are only supported starting with the
2208         * Unicode version indicated in the second field.
2209         */
2210        { -1, 0x320, 0 },
2211
2212        { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2213        { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE },
2214        { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE },
2215
2216        { 0x0341, UCHAR_DEPRECATED, TRUE },
2217        { 0xe0041, UCHAR_DEPRECATED, TRUE },        /* changed from Unicode 5 to 5.1 */
2218        { 0xe0100, UCHAR_DEPRECATED, FALSE },
2219
2220        { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE },
2221        { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE },
2222        { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE },
2223        { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE },     /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2224
2225        { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE },
2226        { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE },
2227        { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE },    /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */
2228        { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE },
2229
2230        { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE },
2231        { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE },
2232
2233        { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE },
2234        { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE },
2235
2236        { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE },
2237        { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE },
2238
2239        { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE },
2240        { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE },
2241
2242        { 0x2e9b, UCHAR_RADICAL, TRUE },
2243        { 0x4e00, UCHAR_RADICAL, FALSE },
2244
2245        { 0x012f, UCHAR_SOFT_DOTTED, TRUE },
2246        { 0x0049, UCHAR_SOFT_DOTTED, FALSE },
2247
2248        { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE },
2249        { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE },
2250
2251        { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */
2252
2253        { 0x002e, UCHAR_S_TERM, TRUE },
2254        { 0x0061, UCHAR_S_TERM, FALSE },
2255
2256        { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE },
2257        { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE },
2258        { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE },
2259        { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE },
2260
2261        /* enum/integer type properties */
2262
2263        /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */
2264        /* test default Bidi classes for unassigned code points */
2265        { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2266        { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2267        { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2268        { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */
2269        { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */
2270        { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2271        { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2272        { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2273        { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2274        { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2275        { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT },
2276
2277        { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2278        { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2279        { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2280        { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2281        { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2282        { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2283        { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2284        { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC },
2285
2286        { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS },
2287        { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU },
2288        { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS },
2289        { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG },
2290        { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU },
2291        { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2292        { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA },
2293        { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS },
2294        { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2295        { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK },
2296        { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B },
2297
2298        /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */
2299        { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 },
2300
2301        { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK },
2302        { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT },
2303        { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2304        { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2305        { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2306        { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL },
2307        { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL },
2308        { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT },
2309        { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE },
2310
2311        { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2312        { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW },
2313        { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2314        { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH },
2315        { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2316        { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH },
2317        { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2318        { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2319        { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2320        { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2321        { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2322        { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2323        { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE },
2324        { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */
2325        { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL },
2326        { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2327        { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS },
2328
2329        /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */
2330        { 0xd7d7, UCHAR_GENERAL_CATEGORY, 0 },
2331
2332        { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP },
2333        { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN },
2334        { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH },
2335        { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH },
2336        { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL },
2337        { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL },
2338
2339        { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING },
2340        { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2341        { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING },
2342        { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING },
2343        { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING },
2344        { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2345        { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2346        { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT },
2347
2348        /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */
2349        { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2350        { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN },
2351        { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION },
2352        { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION },
2353        { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2354        { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2355        { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2356        { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC },
2357        { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2358        { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2359        { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE },
2360        { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION },
2361        { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS },
2362        { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2363        { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC },
2364
2365        /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */
2366
2367        /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */
2368
2369        { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2370        { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2371        { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2372        { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO },
2373
2374        { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2375        { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2376        { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2377        { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO },
2378
2379        { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2380        { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2381        { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2382        { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO },
2383
2384        { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2385        { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2386        { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2387        { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2388        { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2389        { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2390
2391        { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2392        { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2393        { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2394        { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE },
2395
2396        { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2397        { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2398        { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2399        { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2400        { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE },
2401
2402        { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 },
2403
2404        { -1, 0x410, 0 }, /* version break for Unicode 4.1 */
2405
2406        { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE },
2407        { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE },
2408        { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE },
2409
2410        { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2411        { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2412        { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE },
2413        { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2414        { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE },
2415
2416        { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION },
2417        { 0x2c8e,  UCHAR_BLOCK, UBLOCK_COPTIC },
2418        { 0xfe17,  UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS },
2419
2420        { 0x1a00,  UCHAR_SCRIPT, USCRIPT_BUGINESE },
2421        { 0x2cea,  UCHAR_SCRIPT, USCRIPT_COPTIC },
2422        { 0xa82b,  UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI },
2423        { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN },
2424
2425        { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 },
2426        { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 },
2427        { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 },
2428        { 0x115f, UCHAR_LINE_BREAK, U_LB_JL },
2429        { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT },
2430        { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV },
2431
2432        { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT },
2433        { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND },
2434        { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL },
2435        { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V },
2436
2437        { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER },
2438        { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER },
2439        { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC },
2440        { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM },
2441
2442        { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER },
2443        { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER },
2444        { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE },
2445        { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP },
2446
2447        /* undefined UProperty values */
2448        { 0x61, 0x4a7, 0 },
2449        { 0x234bc, 0x15ed, 0 }
2450    };
2451
2452    UVersionInfo version;
2453    UChar32 c;
2454    int32_t i, result, uVersion;
2455    UProperty which;
2456
2457    /* what is our Unicode version? */
2458    u_getUnicodeVersion(version);
2459    uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */
2460
2461    u_charAge(0x20, version);
2462    if(version[0]==0) {
2463        /* no additional properties available */
2464        log_err("TestAdditionalProperties: no additional properties available, not tested\n");
2465        return;
2466    }
2467
2468    /* test u_charAge() */
2469    for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) {
2470        u_charAge(charAges[i].c, version);
2471        if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) {
2472            log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n",
2473                charAges[i].c,
2474                version[0], version[1], version[2], version[3],
2475                charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]);
2476        }
2477    }
2478
2479    if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 ||
2480        u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 ||
2481        u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 ||   /* j2478 */
2482        u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/
2483        u_getIntPropertyMinValue(0x2345)!=0
2484    ) {
2485        log_err("error: u_getIntPropertyMinValue() wrong\n");
2486    }
2487    if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) {
2488        log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n");
2489    }
2490    if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) {
2491        log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n");
2492    }
2493    if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) {
2494        log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n");
2495    }
2496    if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) {
2497        log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n");
2498    }
2499    if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) {
2500        log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n");
2501    }
2502    if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) {
2503        log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n");
2504    }
2505    if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) {
2506        log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n");
2507    }
2508    if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) {
2509        log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n");
2510    }
2511    if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) {
2512        log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n");
2513    }
2514    if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) {
2515        log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n");
2516    }
2517    if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) {
2518        log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n");
2519    }
2520    if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) {
2521        log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n");
2522    }
2523    if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) {
2524        log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n");
2525    }
2526    /*JB#2410*/
2527    if( u_getIntPropertyMaxValue(0x2345)!=-1) {
2528        log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n");
2529    }
2530    if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) {
2531        log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n");
2532    }
2533    if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) !=  (int32_t) (U_JG_COUNT -1)) {
2534        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n");
2535    }
2536    if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) {
2537        log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n");
2538    }
2539    if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) {
2540        log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n");
2541    }
2542
2543    /* test u_hasBinaryProperty() and u_getIntPropertyValue() */
2544    for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) {
2545        if(props[i][0]<0) {
2546            /* Unicode version break */
2547            if(uVersion<props[i][1]) {
2548                break; /* do not test properties that are not yet supported */
2549            } else {
2550                continue; /* skip this row */
2551            }
2552        }
2553
2554        c=(UChar32)props[i][0];
2555        which=(UProperty)props[i][1];
2556
2557        if(which<UCHAR_INT_START) {
2558            result=u_hasBinaryProperty(c, which);
2559            if(result!=props[i][2]) {
2560                log_err("error: u_hasBinaryProperty(U+%04lx, %d)=%d is wrong (props[%d])\n",
2561                        c, which, result, i);
2562            }
2563        }
2564
2565        result=u_getIntPropertyValue(c, which);
2566        if(result!=props[i][2]) {
2567            log_err("error: u_getIntPropertyValue(U+%04lx, 0x1000+%d)=%d is wrong, should be %d (props[%d])\n",
2568                    c, (int32_t)which-0x1000, result, props[i][2], i);
2569        }
2570
2571        /* test separate functions, too */
2572        switch((UProperty)props[i][1]) {
2573        case UCHAR_ALPHABETIC:
2574            if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) {
2575                log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n",
2576                        props[i][0], result, i);
2577            }
2578            break;
2579        case UCHAR_LOWERCASE:
2580            if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2581                log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n",
2582                        props[i][0], result, i);
2583            }
2584            break;
2585        case UCHAR_UPPERCASE:
2586            if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) {
2587                log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n",
2588                        props[i][0], result, i);
2589            }
2590            break;
2591        case UCHAR_WHITE_SPACE:
2592            if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) {
2593                log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n",
2594                        props[i][0], result, i);
2595            }
2596            break;
2597        default:
2598            break;
2599        }
2600    }
2601}
2602
2603static void
2604TestNumericProperties(void) {
2605    /* see UnicodeData.txt, DerivedNumericValues.txt */
2606    static const struct {
2607        UChar32 c;
2608        int32_t type;
2609        double numValue;
2610    } values[]={
2611        { 0x0F33, U_NT_NUMERIC, -1./2. },
2612        { 0x0C66, U_NT_DECIMAL, 0 },
2613        { 0x96f6, U_NT_NUMERIC, 0 },
2614        { 0x2159, U_NT_NUMERIC, 1./6. },
2615        { 0x00BD, U_NT_NUMERIC, 1./2. },
2616        { 0x0031, U_NT_DECIMAL, 1. },
2617        { 0x4e00, U_NT_NUMERIC, 1. },
2618        { 0x58f1, U_NT_NUMERIC, 1. },
2619        { 0x10320, U_NT_NUMERIC, 1. },
2620        { 0x0F2B, U_NT_NUMERIC, 3./2. },
2621        { 0x00B2, U_NT_DIGIT, 2. },
2622        { 0x5f10, U_NT_NUMERIC, 2. },
2623        { 0x1813, U_NT_DECIMAL, 3. },
2624        { 0x5f0e, U_NT_NUMERIC, 3. },
2625        { 0x2173, U_NT_NUMERIC, 4. },
2626        { 0x8086, U_NT_NUMERIC, 4. },
2627        { 0x278E, U_NT_DIGIT, 5. },
2628        { 0x1D7F2, U_NT_DECIMAL, 6. },
2629        { 0x247A, U_NT_DIGIT, 7. },
2630        { 0x7396, U_NT_NUMERIC, 9. },
2631        { 0x1372, U_NT_NUMERIC, 10. },
2632        { 0x216B, U_NT_NUMERIC, 12. },
2633        { 0x16EE, U_NT_NUMERIC, 17. },
2634        { 0x249A, U_NT_NUMERIC, 19. },
2635        { 0x303A, U_NT_NUMERIC, 30. },
2636        { 0x5345, U_NT_NUMERIC, 30. },
2637        { 0x32B2, U_NT_NUMERIC, 37. },
2638        { 0x1375, U_NT_NUMERIC, 40. },
2639        { 0x10323, U_NT_NUMERIC, 50. },
2640        { 0x0BF1, U_NT_NUMERIC, 100. },
2641        { 0x964c, U_NT_NUMERIC, 100. },
2642        { 0x217E, U_NT_NUMERIC, 500. },
2643        { 0x2180, U_NT_NUMERIC, 1000. },
2644        { 0x4edf, U_NT_NUMERIC, 1000. },
2645        { 0x2181, U_NT_NUMERIC, 5000. },
2646        { 0x137C, U_NT_NUMERIC, 10000. },
2647        { 0x4e07, U_NT_NUMERIC, 10000. },
2648        { 0x4ebf, U_NT_NUMERIC, 100000000. },
2649        { 0x5146, U_NT_NUMERIC, 1000000000000. },
2650        { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE },
2651        { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE },
2652        { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE },
2653        { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE },
2654        { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE },
2655        { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }
2656    };
2657
2658    double nv;
2659    UChar32 c;
2660    int32_t i, type;
2661
2662    for(i=0; i<LENGTHOF(values); ++i) {
2663        c=values[i].c;
2664        type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE);
2665        nv=u_getNumericValue(c);
2666
2667        if(type!=values[i].type) {
2668            log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type);
2669        }
2670        if(0.000001 <= fabs(nv - values[i].numValue)) {
2671            log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue);
2672        }
2673    }
2674}
2675
2676/**
2677 * Test the property names and property value names API.
2678 */
2679static void
2680TestPropertyNames(void) {
2681    int32_t p, v, choice=0, rev;
2682    UBool atLeastSomething = FALSE;
2683
2684    for (p=0; ; ++p) {
2685        UProperty propEnum = (UProperty)p;
2686        UBool sawProp = FALSE;
2687        if(p > 10 && !atLeastSomething) {
2688          log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice);
2689          return;
2690        }
2691
2692        for (choice=0; ; ++choice) {
2693            const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice);
2694            if (name) {
2695                if (!sawProp)
2696                    log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff);
2697                log_verbose("%d=\"%s\"", choice, name);
2698                sawProp = TRUE;
2699                atLeastSomething = TRUE;
2700
2701                /* test reverse mapping */
2702                rev = u_getPropertyEnum(name);
2703                if (rev != p) {
2704                    log_err("Property round-trip failure: %d -> %s -> %d\n",
2705                            p, name, rev);
2706                }
2707            }
2708            if (!name && choice>0) break;
2709        }
2710        if (sawProp) {
2711            /* looks like a valid property; check the values */
2712            const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2713            int32_t max = 0;
2714            if (p == UCHAR_CANONICAL_COMBINING_CLASS) {
2715                max = 255;
2716            } else if (p == UCHAR_GENERAL_CATEGORY_MASK) {
2717                /* it's far too slow to iterate all the way up to
2718                   the real max, U_GC_P_MASK */
2719                max = U_GC_NL_MASK;
2720            } else if (p == UCHAR_BLOCK) {
2721                /* UBlockCodes, unlike other values, start at 1 */
2722                max = 1;
2723            }
2724            log_verbose("\n");
2725            for (v=-1; ; ++v) {
2726                UBool sawValue = FALSE;
2727                for (choice=0; ; ++choice) {
2728                    const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice);
2729                    if (vname) {
2730                        if (!sawValue) log_verbose(" %s, value %d:", pname, v);
2731                        log_verbose("%d=\"%s\"", choice, vname);
2732                        sawValue = TRUE;
2733
2734                        /* test reverse mapping */
2735                        rev = u_getPropertyValueEnum(propEnum, vname);
2736                        if (rev != v) {
2737                            log_err("Value round-trip failure (%s): %d -> %s -> %d\n",
2738                                    pname, v, vname, rev);
2739                        }
2740                    }
2741                    if (!vname && choice>0) break;
2742                }
2743                if (sawValue) {
2744                    log_verbose("\n");
2745                }
2746                if (!sawValue && v>=max) break;
2747            }
2748        }
2749        if (!sawProp) {
2750            if (p>=UCHAR_STRING_LIMIT) {
2751                break;
2752            } else if (p>=UCHAR_DOUBLE_LIMIT) {
2753                p = UCHAR_STRING_START - 1;
2754            } else if (p>=UCHAR_MASK_LIMIT) {
2755                p = UCHAR_DOUBLE_START - 1;
2756            } else if (p>=UCHAR_INT_LIMIT) {
2757                p = UCHAR_MASK_START - 1;
2758            } else if (p>=UCHAR_BINARY_LIMIT) {
2759                p = UCHAR_INT_START - 1;
2760            }
2761        }
2762    }
2763}
2764
2765/**
2766 * Test the property values API.  See JB#2410.
2767 */
2768static void
2769TestPropertyValues(void) {
2770    int32_t i, p, min, max;
2771    UErrorCode ec;
2772
2773    /* Min should be 0 for everything. */
2774    /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */
2775    for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) {
2776        UProperty propEnum = (UProperty)p;
2777        min = u_getIntPropertyMinValue(propEnum);
2778        if (min != 0) {
2779            if (p == UCHAR_BLOCK) {
2780                /* This is okay...for now.  See JB#2487.
2781                   TODO Update this for JB#2487. */
2782            } else {
2783                const char* name;
2784                name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME);
2785                if (name == NULL)
2786                    name = "<ERROR>";
2787                log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n",
2788                        name, min);
2789            }
2790        }
2791    }
2792
2793    if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 ||
2794        u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) {
2795        log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n");
2796    }
2797
2798    /* Max should be -1 for invalid properties. */
2799    max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE);
2800    if (max != -1) {
2801        log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n",
2802                max);
2803    }
2804
2805    /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */
2806    for (i=0; i<2; ++i) {
2807        int32_t script;
2808        const char* desc;
2809        ec = U_ZERO_ERROR;
2810        switch (i) {
2811        case 0:
2812            script = uscript_getScript(-1, &ec);
2813            desc = "uscript_getScript(-1)";
2814            break;
2815        case 1:
2816            script = u_getIntPropertyValue(-1, UCHAR_SCRIPT);
2817            desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)";
2818            break;
2819        default:
2820            log_err("Internal test error. Too many scripts\n");
2821            return;
2822        }
2823        /* We don't explicitly test ec.  It should be U_FAILURE but it
2824           isn't documented as such. */
2825        if (script != (int32_t)USCRIPT_INVALID_CODE) {
2826            log_err("FAIL: %s = %d, exp. 0\n",
2827                    desc, script);
2828        }
2829    }
2830}
2831
2832/* add characters from a serialized set to a normal one */
2833static void
2834_setAddSerialized(USet *set, const USerializedSet *sset) {
2835    UChar32 start, end;
2836    int32_t i, count;
2837
2838    count=uset_getSerializedRangeCount(sset);
2839    for(i=0; i<count; ++i) {
2840        uset_getSerializedRange(sset, i, &start, &end);
2841        uset_addRange(set, start, end);
2842    }
2843}
2844
2845/* various tests for consistency of UCD data and API behavior */
2846static void
2847TestConsistency() {
2848#if !UCONFIG_NO_NORMALIZATION
2849    UChar buffer16[300];
2850#endif
2851    char buffer[300];
2852    USet *set1, *set2, *set3, *set4;
2853    UErrorCode errorCode;
2854
2855#if !UCONFIG_NO_NORMALIZATION
2856    USerializedSet sset;
2857#endif
2858    UChar32 start, end;
2859    int32_t i, length;
2860
2861    U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10);
2862    U_STRING_DECL(dashPattern, "[:Dash:]", 8);
2863    U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13);
2864    U_STRING_DECL(formatPattern, "[:Cf:]", 6);
2865    U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14);
2866
2867    U_STRING_DECL(mathBlocksPattern,
2868        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
2869        1+32+46+46+45+43+1+1); /* +1 for NUL */
2870    U_STRING_DECL(mathPattern, "[:Math:]", 8);
2871    U_STRING_DECL(unassignedPattern, "[:Cn:]", 6);
2872    U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14);
2873    U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
2874
2875    U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10);
2876    U_STRING_INIT(dashPattern, "[:Dash:]", 8);
2877    U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13);
2878    U_STRING_INIT(formatPattern, "[:Cf:]", 6);
2879    U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14);
2880
2881    U_STRING_INIT(mathBlocksPattern,
2882        "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]",
2883        1+32+46+46+45+43+1+1); /* +1 for NUL */
2884    U_STRING_INIT(mathPattern, "[:Math:]", 8);
2885    U_STRING_INIT(unassignedPattern, "[:Cn:]", 6);
2886    U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14);
2887    U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20);
2888
2889    /*
2890     * It used to be that UCD.html and its precursors said
2891     * "Those dashes used to mark connections between pieces of words,
2892     *  plus the Katakana middle dot."
2893     *
2894     * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash
2895     * but not from Hyphen.
2896     * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html.
2897     * Therefore, do not show errors when testing the Hyphen property.
2898     */
2899    log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n"
2900                "known to the UTC and not considered errors.\n");
2901
2902    errorCode=U_ZERO_ERROR;
2903    set1=uset_openPattern(hyphenPattern, 10, &errorCode);
2904    set2=uset_openPattern(dashPattern, 8, &errorCode);
2905    if(U_SUCCESS(errorCode)) {
2906        /* remove the Katakana middle dot(s) from set1 */
2907        uset_remove(set1, 0x30fb);
2908        uset_remove(set1, 0xff65); /* halfwidth variant */
2909        showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE);
2910    } else {
2911        log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode));
2912    }
2913
2914    /* check that Cf is neither Hyphen nor Dash nor Alphabetic */
2915    set3=uset_openPattern(formatPattern, 6, &errorCode);
2916    set4=uset_openPattern(alphaPattern, 14, &errorCode);
2917    if(U_SUCCESS(errorCode)) {
2918        showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE);
2919        showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE);
2920        showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE);
2921    } else {
2922        log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode));
2923    }
2924
2925    uset_close(set1);
2926    uset_close(set2);
2927    uset_close(set3);
2928    uset_close(set4);
2929
2930    /*
2931     * Check that each lowercase character has "small" in its name
2932     * and not "capital".
2933     * There are some such characters, some of which seem odd.
2934     * Use the verbose flag to see these notices.
2935     */
2936    errorCode=U_ZERO_ERROR;
2937    set1=uset_openPattern(lowerPattern, 13, &errorCode);
2938    if(U_SUCCESS(errorCode)) {
2939        for(i=0;; ++i) {
2940            length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode);
2941            if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2942                break; /* done */
2943            }
2944            if(U_FAILURE(errorCode)) {
2945                log_err("error iterating over [:Lowercase:] at item %d: %s\n",
2946                        i, u_errorName(errorCode));
2947                break;
2948            }
2949            if(length!=0) {
2950                break; /* done with code points, got a string or -1 */
2951            }
2952
2953            while(start<=end) {
2954                length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode);
2955                if(U_FAILURE(errorCode)) {
2956                    log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode));
2957                    errorCode=U_ZERO_ERROR;
2958                    continue;
2959                }
2960                if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) &&
2961                    strstr(buffer, "SMALL CAPITAL")==NULL
2962                ) {
2963                    log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer);
2964                }
2965                ++start;
2966            }
2967        }
2968    } else {
2969        log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode));
2970    }
2971    uset_close(set1);
2972
2973#if !UCONFIG_NO_NORMALIZATION
2974
2975    /*
2976     * Test for an example that unorm_getCanonStartSet() delivers
2977     * all characters that compose from the input one,
2978     * even in multiple steps.
2979     * For example, the set for "I" (0049) should contain both
2980     * I-diaeresis (00CF) and I-diaeresis-acute (1E2E).
2981     * In general, the set for the middle such character should be a subset
2982     * of the set for the first.
2983     */
2984    set1=uset_open(1, 0);
2985    set2=uset_open(1, 0);
2986
2987    if (unorm_getCanonStartSet(0x49, &sset)) {
2988        _setAddSerialized(set1, &sset);
2989
2990        /* enumerate all characters that are plausible to be latin letters */
2991        for(start=0xa0; start<0x2000; ++start) {
2992            if(unorm_getDecomposition(start, FALSE, buffer16, LENGTHOF(buffer16))>1 && buffer16[0]==0x49) {
2993                uset_add(set2, start);
2994            }
2995        }
2996
2997        compareUSets(set1, set2,
2998                     "[canon start set of 0049]", "[all c with canon decomp with 0049]",
2999                     TRUE);
3000    } else {
3001      log_err("error calling unorm_getCanonStartSet()\n");
3002    }
3003
3004    uset_close(set1);
3005    uset_close(set2);
3006
3007#endif
3008
3009    /* verify that all assigned characters in Math blocks are exactly Math characters */
3010    errorCode=U_ZERO_ERROR;
3011    set1=uset_openPattern(mathBlocksPattern, -1, &errorCode);
3012    set2=uset_openPattern(mathPattern, 8, &errorCode);
3013    set3=uset_openPattern(unassignedPattern, 6, &errorCode);
3014    if(U_SUCCESS(errorCode)) {
3015        uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */
3016        uset_complement(set3);      /* assigned characters */
3017        uset_retainAll(set1, set3); /* [math blocks]&[assigned] */
3018        compareUSets(set1, set2,
3019                     "[assigned Math block chars]", "[math blocks]&[:Math:]",
3020                     TRUE);
3021    } else {
3022        log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode));
3023    }
3024    uset_close(set1);
3025    uset_close(set2);
3026    uset_close(set3);
3027
3028    /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */
3029    errorCode=U_ZERO_ERROR;
3030    set1=uset_openPattern(unknownPattern, 14, &errorCode);
3031    set2=uset_openPattern(reservedPattern, 20, &errorCode);
3032    if(U_SUCCESS(errorCode)) {
3033        compareUSets(set1, set2,
3034                     "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]",
3035                     TRUE);
3036    } else {
3037        log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode));
3038    }
3039    uset_close(set1);
3040    uset_close(set2);
3041}
3042
3043/*
3044 * Starting with ICU4C 3.4, the core Unicode properties files
3045 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu)
3046 * are hardcoded in the common DLL and therefore not included
3047 * in the data package any more.
3048 * Test requiring these files are disabled so that
3049 * we need not jump through hoops (like adding snapshots of these files
3050 * to testdata).
3051 * See Jitterbug 4497.
3052 */
3053#define HARDCODED_DATA_4497 1
3054
3055/* API coverage for ucase.c */
3056static void TestUCase() {
3057#if !HARDCODED_DATA_4497
3058    UDataMemory *pData;
3059    UCaseProps *csp;
3060    const UCaseProps *ccsp;
3061    UErrorCode errorCode;
3062
3063    /* coverage for ucase_openBinary() */
3064    errorCode=U_ZERO_ERROR;
3065    pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode);
3066    if(U_FAILURE(errorCode)) {
3067        log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3068                    u_errorName(errorCode));
3069        return;
3070    }
3071
3072    csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3073    if(U_FAILURE(errorCode)) {
3074        log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n",
3075                u_errorName(errorCode));
3076        udata_close(pData);
3077        return;
3078    }
3079
3080    if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */
3081        log_err("ucase_openBinary() does not seem to return working UCaseProps\n");
3082    }
3083
3084    ucase_close(csp);
3085    udata_close(pData);
3086
3087    /* coverage for ucase_getDummy() */
3088    errorCode=U_ZERO_ERROR;
3089    ccsp=ucase_getDummy(&errorCode);
3090    if(ucase_tolower(ccsp, 0x41)!=0x41) {
3091        log_err("ucase_tolower(dummy, A)!=A\n");
3092    }
3093#endif
3094}
3095
3096/* API coverage for ubidi_props.c */
3097static void TestUBiDiProps() {
3098#if !HARDCODED_DATA_4497
3099    UDataMemory *pData;
3100    UBiDiProps *bdp;
3101    const UBiDiProps *cbdp;
3102    UErrorCode errorCode;
3103
3104    /* coverage for ubidi_openBinary() */
3105    errorCode=U_ZERO_ERROR;
3106    pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode);
3107    if(U_FAILURE(errorCode)) {
3108        log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3109                    u_errorName(errorCode));
3110        return;
3111    }
3112
3113    bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode);
3114    if(U_FAILURE(errorCode)) {
3115        log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n",
3116                u_errorName(errorCode));
3117        udata_close(pData);
3118        return;
3119    }
3120
3121    if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */
3122        log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n");
3123    }
3124
3125    ubidi_closeProps(bdp);
3126    udata_close(pData);
3127
3128    /* coverage for ubidi_getDummy() */
3129    errorCode=U_ZERO_ERROR;
3130    cbdp=ubidi_getDummy(&errorCode);
3131    if(ubidi_getClass(cbdp, 0x20)!=0) {
3132        log_err("ubidi_getClass(dummy, space)!=0\n");
3133    }
3134#endif
3135}
3136
3137/* test case folding, compare return values with CaseFolding.txt ------------ */
3138
3139/* bit set for which case foldings for a character have been tested already */
3140enum {
3141    CF_SIMPLE=1,
3142    CF_FULL=2,
3143    CF_TURKIC=4,
3144    CF_ALL=7
3145};
3146
3147static void
3148testFold(UChar32 c, int which,
3149         UChar32 simple, UChar32 turkic,
3150         const UChar *full, int32_t fullLength,
3151         const UChar *turkicFull, int32_t turkicFullLength) {
3152    UChar s[2], t[32];
3153    UChar32 c2;
3154    int32_t length, length2;
3155
3156    UErrorCode errorCode=U_ZERO_ERROR;
3157
3158    length=0;
3159    U16_APPEND_UNSAFE(s, length, c);
3160
3161    if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) {
3162        log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3163    }
3164    if((which&CF_FULL)!=0) {
3165        length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode);
3166        if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) {
3167            log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c);
3168        }
3169    }
3170    if((which&CF_TURKIC)!=0) {
3171        if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) {
3172            log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple);
3173        }
3174
3175        length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
3176        if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) {
3177            log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c);
3178        }
3179    }
3180}
3181
3182/* test that c case-folds to itself */
3183static void
3184testFoldToSelf(UChar32 c, int which) {
3185    UChar s[2];
3186    int32_t length;
3187
3188    length=0;
3189    U16_APPEND_UNSAFE(s, length, c);
3190    testFold(c, which, c, c, s, length, s, length);
3191}
3192
3193struct CaseFoldingData {
3194    USet *notSeen;
3195    UChar32 prev, prevSimple;
3196    UChar prevFull[32];
3197    int32_t prevFullLength;
3198    int which;
3199};
3200typedef struct CaseFoldingData CaseFoldingData;
3201
3202static void U_CALLCONV
3203caseFoldingLineFn(void *context,
3204                  char *fields[][2], int32_t fieldCount,
3205                  UErrorCode *pErrorCode) {
3206    CaseFoldingData *pData=(CaseFoldingData *)context;
3207    char *end;
3208    UChar full[32];
3209    UChar32 c, prev, simple;
3210    int32_t count;
3211    int which;
3212    char status;
3213
3214    /* get code point */
3215    c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16);
3216    end=(char *)u_skipWhitespace(end);
3217    if(end<=fields[0][0] || end!=fields[0][1]) {
3218        log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]);
3219        *pErrorCode=U_PARSE_ERROR;
3220        return;
3221    }
3222
3223    /* get the status of this mapping */
3224    status=*u_skipWhitespace(fields[1][0]);
3225    if(status!='C' && status!='S' && status!='F' && status!='T') {
3226        log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]);
3227        *pErrorCode=U_PARSE_ERROR;
3228        return;
3229    }
3230
3231    /* get the mapping */
3232    count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode);
3233    if(U_FAILURE(*pErrorCode)) {
3234        log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]);
3235        return;
3236    }
3237
3238    /* there is a simple mapping only if there is exactly one code point (count is in UChars) */
3239    if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) {
3240        simple=c;
3241    }
3242
3243    if(c!=(prev=pData->prev)) {
3244        /*
3245         * Test remaining mappings for the previous code point.
3246         * If a turkic folding was not mentioned, then it should fold the same
3247         * as the regular simple case folding.
3248         */
3249        UChar s[2];
3250        int32_t length;
3251
3252        length=0;
3253        U16_APPEND_UNSAFE(s, length, prev);
3254        testFold(prev, (~pData->which)&CF_ALL,
3255                 prev, pData->prevSimple,
3256                 s, length,
3257                 pData->prevFull, pData->prevFullLength);
3258        pData->prev=pData->prevSimple=c;
3259        length=0;
3260        U16_APPEND_UNSAFE(pData->prevFull, length, c);
3261        pData->prevFullLength=length;
3262        pData->which=0;
3263    }
3264
3265    /*
3266     * Turn the status into a bit set of case foldings to test.
3267     * Remember non-Turkic case foldings as defaults for Turkic mode.
3268     */
3269    switch(status) {
3270    case 'C':
3271        which=CF_SIMPLE|CF_FULL;
3272        pData->prevSimple=simple;
3273        u_memcpy(pData->prevFull, full, count);
3274        pData->prevFullLength=count;
3275        break;
3276    case 'S':
3277        which=CF_SIMPLE;
3278        pData->prevSimple=simple;
3279        break;
3280    case 'F':
3281        which=CF_FULL;
3282        u_memcpy(pData->prevFull, full, count);
3283        pData->prevFullLength=count;
3284        break;
3285    case 'T':
3286        which=CF_TURKIC;
3287        break;
3288    default:
3289        which=0;
3290        break; /* won't happen because of test above */
3291    }
3292
3293    testFold(c, which, simple, simple, full, count, full, count);
3294
3295    /* remember which case foldings of c have been tested */
3296    pData->which|=which;
3297
3298    /* remove c from the set of ones not mentioned in CaseFolding.txt */
3299    uset_remove(pData->notSeen, c);
3300}
3301
3302static void
3303TestCaseFolding() {
3304    CaseFoldingData data={ NULL };
3305    char *fields[3][2];
3306    UErrorCode errorCode;
3307
3308    static char *lastLine= (char *)"10FFFF; C; 10FFFF;";
3309
3310    errorCode=U_ZERO_ERROR;
3311    /* test BMP & plane 1 - nothing interesting above */
3312    data.notSeen=uset_open(0, 0x1ffff);
3313    data.prevFullLength=1; /* length of full case folding of U+0000 */
3314
3315    parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode);
3316    if(U_SUCCESS(errorCode)) {
3317        int32_t i, start, end;
3318
3319        /* add a pseudo-last line to finish testing of the actual last one */
3320        fields[0][0]=lastLine;
3321        fields[0][1]=lastLine+6;
3322        fields[1][0]=lastLine+7;
3323        fields[1][1]=lastLine+9;
3324        fields[2][0]=lastLine+10;
3325        fields[2][1]=lastLine+17;
3326        caseFoldingLineFn(&data, fields, 3, &errorCode);
3327
3328        /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */
3329        for(i=0;
3330            0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) &&
3331                U_SUCCESS(errorCode);
3332            ++i
3333        ) {
3334            do {
3335                testFoldToSelf(start, CF_ALL);
3336            } while(++start<=end);
3337        }
3338    }
3339
3340    uset_close(data.notSeen);
3341}
3342