1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CNORMTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API
13*     synwee                      added test for quick check
14*     synwee                      added test for checkFCD
15*********************************************************************************/
16/*tests for u_normalization*/
17#include "unicode/utypes.h"
18#include "unicode/unorm.h"
19#include "unicode/utf16.h"
20#include "cintltst.h"
21
22#if !UCONFIG_NO_NORMALIZATION
23
24#include <stdlib.h>
25#include <time.h>
26#include "unicode/uchar.h"
27#include "unicode/ustring.h"
28#include "unicode/unorm.h"
29#include "cnormtst.h"
30
31#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof ((array)[0]))
32
33static void
34TestAPI(void);
35
36static void
37TestNormCoverage(void);
38
39static void
40TestConcatenate(void);
41
42static void
43TestNextPrevious(void);
44
45static void TestIsNormalized(void);
46
47static void
48TestFCNFKCClosure(void);
49
50static void
51TestQuickCheckPerCP(void);
52
53static void
54TestComposition(void);
55
56static void
57TestFCD(void);
58
59static void
60TestGetDecomposition(void);
61
62static void
63TestGetRawDecomposition(void);
64
65static void TestAppendRestoreMiddle(void);
66static void TestGetEasyToUseInstance(void);
67
68static const char* const canonTests[][3] = {
69    /* Input*/                    /*Decomposed*/                /*Composed*/
70    { "cat",                    "cat",                        "cat"                    },
71    { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
72
73    { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
74    { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
75
76    { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
77    { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
78    { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
79
80    { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
81    { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
82
83    { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
84    { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
85    { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
86
87    { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
88    { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
89
90    { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
91    { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
92
93    { "Henry IV",                "Henry IV",                    "Henry IV"                },
94    { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
95
96    { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
97    { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
98    { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
99    { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
100    { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
101    { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
102    { "", "", "" }
103};
104
105static const char* const compatTests[][3] = {
106    /* Input*/                        /*Decomposed    */                /*Composed*/
107    { "cat",                        "cat",                            "cat"                },
108
109    { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
110
111    { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
112    { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
113
114    { "Henry IV",                    "Henry IV",                        "Henry IV"            },
115    { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
116
117    { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
118    { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
119
120    { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
121
122    /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
123    { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
124    { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
125    { "", "", "" }
126};
127
128static const char* const fcdTests[][3] = {
129    /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
130    { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
131    { "\\u010e", "\\u010e", NULL }  /* D-caron */
132};
133
134void addNormTest(TestNode** root);
135
136void addNormTest(TestNode** root)
137{
138    addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
139    addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
140    addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
141    addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
142    addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
143    addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
144    addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
145    addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
146    addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
147    addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
148    addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
149    addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
150    addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
151    addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
152    addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
153    addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
154    addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
155    addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
156    addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
157    addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
158}
159
160static const char* const modeStrings[]={
161    "UNORM_NONE",
162    "UNORM_NFD",
163    "UNORM_NFKD",
164    "UNORM_NFC",
165    "UNORM_NFKC",
166    "UNORM_FCD",
167    "UNORM_MODE_COUNT"
168};
169
170static void TestNormCases(UNormalizationMode mode,
171                          const char* const cases[][3], int32_t lengthOfCases) {
172    int32_t x, neededLen, length2;
173    int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
174    UChar *source=NULL;
175    UChar result[16];
176    log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
177    for(x=0; x < lengthOfCases; x++)
178    {
179        UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
180        source=CharsToUChars(cases[x][0]);
181        neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
182        length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
183        if(neededLen!=length2) {
184          log_err("ERROR in unorm_normalize(%s)[%d]: "
185                  "preflight length/NUL %d!=%d preflight length/srcLength\n",
186                  modeStrings[mode], (int)x, (int)neededLen, (int)length2);
187        }
188        if(status==U_BUFFER_OVERFLOW_ERROR)
189        {
190            status=U_ZERO_ERROR;
191        }
192        length2=unorm_normalize(source, u_strlen(source), mode, 0, result, LENGTHOF(result), &status);
193        if(U_FAILURE(status) || neededLen!=length2) {
194            log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
195                         modeStrings[mode], austrdup(source), myErrorName(status));
196        } else {
197            assertEqual(result, cases[x][expIndex], x);
198        }
199        length2=unorm_normalize(source, -1, mode, 0, result, LENGTHOF(result), &status);
200        if(U_FAILURE(status) || neededLen!=length2) {
201            log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
202                         modeStrings[mode], austrdup(source), myErrorName(status));
203        } else {
204            assertEqual(result, cases[x][expIndex], x);
205        }
206        free(source);
207    }
208}
209
210void TestDecomp() {
211    TestNormCases(UNORM_NFD, canonTests, LENGTHOF(canonTests));
212}
213
214void TestCompatDecomp() {
215    TestNormCases(UNORM_NFKD, compatTests, LENGTHOF(compatTests));
216}
217
218void TestCanonDecompCompose() {
219    TestNormCases(UNORM_NFC, canonTests, LENGTHOF(canonTests));
220}
221
222void TestCompatDecompCompose() {
223    TestNormCases(UNORM_NFKC, compatTests, LENGTHOF(compatTests));
224}
225
226void TestFCD() {
227    TestNormCases(UNORM_FCD, fcdTests, LENGTHOF(fcdTests));
228}
229
230static void assertEqual(const UChar* result, const char* expected, int32_t index)
231{
232    UChar *expectedUni = CharsToUChars(expected);
233    if(u_strcmp(result, expectedUni)!=0){
234        log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
235            austrdup(result) );
236    }
237    free(expectedUni);
238}
239
240static void TestNull_check(UChar *src, int32_t srcLen,
241                    UChar *exp, int32_t expLen,
242                    UNormalizationMode mode,
243                    const char *name)
244{
245    UErrorCode status = U_ZERO_ERROR;
246    int32_t len, i;
247
248    UChar   result[50];
249
250
251    status = U_ZERO_ERROR;
252
253    for(i=0;i<50;i++)
254      {
255        result[i] = 0xFFFD;
256      }
257
258    len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
259
260    if(U_FAILURE(status)) {
261      log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
262    } else if (len != expLen) {
263      log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
264    }
265
266    {
267      for(i=0;i<len;i++){
268        if(exp[i] != result[i]) {
269          log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
270                  name,
271                  i,
272                  exp[i],
273                  result[i]);
274          return;
275        }
276        log_verbose("     %d: \\u%04X\n", i, result[i]);
277      }
278    }
279
280    log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
281}
282
283void TestNull()
284{
285
286    UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
287    int32_t source_comp_len = 4;
288    UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
289    int32_t expect_comp_len = 3;
290
291    UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
292    int32_t source_dcmp_len = 3;
293    UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
294    int32_t expect_dcmp_len = 5;
295
296    TestNull_check(source_comp,
297                   source_comp_len,
298                   expect_comp,
299                   expect_comp_len,
300                   UNORM_NFC,
301                   "UNORM_NFC");
302
303    TestNull_check(source_dcmp,
304                   source_dcmp_len,
305                   expect_dcmp,
306                   expect_dcmp_len,
307                   UNORM_NFD,
308                   "UNORM_NFD");
309
310    TestNull_check(source_comp,
311                   source_comp_len,
312                   expect_comp,
313                   expect_comp_len,
314                   UNORM_NFKC,
315                   "UNORM_NFKC");
316
317
318}
319
320static void TestQuickCheckResultNO()
321{
322  const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
323                         0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
324  const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
325                          0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
326  const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
327                           0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
328  const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
329                           0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330
331
332  const int SIZE = 10;
333
334  int count = 0;
335  UErrorCode error = U_ZERO_ERROR;
336
337  for (; count < SIZE; count ++)
338  {
339    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
340                                                              UNORM_NO)
341    {
342      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
343      return;
344    }
345    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
346                                                              UNORM_NO)
347    {
348      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
349      return;
350    }
351    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
352                                                              UNORM_NO)
353    {
354      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
355      return;
356    }
357    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
358                                                              UNORM_NO)
359    {
360      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
361      return;
362    }
363  }
364}
365
366
367static void TestQuickCheckResultYES()
368{
369  const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
370                         0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
371  const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
372                         0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
373  const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
374                          0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
375  const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
376                          0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
377
378  const int SIZE = 10;
379  int count = 0;
380  UErrorCode error = U_ZERO_ERROR;
381
382  UChar cp = 0;
383  while (cp < 0xA0)
384  {
385    if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
386    {
387      log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
388      return;
389    }
390    if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
391                                                             UNORM_YES)
392    {
393      log_err("ERROR in NFC quick check at U+%04x\n", cp);
394      return;
395    }
396    if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
397    {
398      log_err("ERROR in NFKD quick check at U+%04x\n", cp);
399      return;
400    }
401    if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
402                                                             UNORM_YES)
403    {
404      log_err("ERROR in NFKC quick check at U+%04x\n", cp);
405      return;
406    }
407    cp ++;
408  }
409
410  for (; count < SIZE; count ++)
411  {
412    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
413                                                             UNORM_YES)
414    {
415      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
416      return;
417    }
418    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
419                                                          != UNORM_YES)
420    {
421      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
422      return;
423    }
424    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
425                                                             UNORM_YES)
426    {
427      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
428      return;
429    }
430    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
431                                                             UNORM_YES)
432    {
433      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
434      return;
435    }
436  }
437}
438
439static void TestQuickCheckResultMAYBE()
440{
441  const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
442                         0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
443  const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
444                          0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
445
446
447  const int SIZE = 10;
448
449  int count = 0;
450  UErrorCode error = U_ZERO_ERROR;
451
452  /* NFD and NFKD does not have any MAYBE codepoints */
453  for (; count < SIZE; count ++)
454  {
455    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
456                                                           UNORM_MAYBE)
457    {
458      log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
459      return;
460    }
461    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
462                                                           UNORM_MAYBE)
463    {
464      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
465      return;
466    }
467  }
468}
469
470static void TestQuickCheckStringResult()
471{
472  int count;
473  UChar *d = NULL;
474  UChar *c = NULL;
475  UErrorCode error = U_ZERO_ERROR;
476
477  for (count = 0; count < LENGTHOF(canonTests); count ++)
478  {
479    d = CharsToUChars(canonTests[count][1]);
480    c = CharsToUChars(canonTests[count][2]);
481    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
482                                                            UNORM_YES)
483    {
484      log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
485      return;
486    }
487
488    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
489                                                            UNORM_NO)
490    {
491      log_err("ERROR in NFC quick check for string at count %d\n", count);
492      return;
493    }
494
495    free(d);
496    free(c);
497  }
498
499  for (count = 0; count < LENGTHOF(compatTests); count ++)
500  {
501    d = CharsToUChars(compatTests[count][1]);
502    c = CharsToUChars(compatTests[count][2]);
503    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
504                                                            UNORM_YES)
505    {
506      log_err("ERROR in NFKD quick check for string at count %d\n", count);
507      return;
508    }
509
510    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
511                                                            UNORM_YES)
512    {
513      log_err("ERROR in NFKC quick check for string at count %d\n", count);
514      return;
515    }
516
517    free(d);
518    free(c);
519  }
520}
521
522void TestQuickCheck()
523{
524  TestQuickCheckResultNO();
525  TestQuickCheckResultYES();
526  TestQuickCheckResultMAYBE();
527  TestQuickCheckStringResult();
528}
529
530/*
531 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
532 * normalized, and some that are not.
533 * Here we pick some specific cases and test the C API.
534 */
535static void TestIsNormalized(void) {
536    static const UChar notNFC[][8]={            /* strings that are not in NFC */
537        { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
538        { 0xfb1d, 0 },                          /* excluded from composition */
539        { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
540        { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
541    };
542    static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
543        { 0x1100, 0x1161, 0 },                  /* Jamo compose */
544        { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
545        { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
546    };
547
548    int32_t i;
549    UErrorCode errorCode;
550
551    /* API test */
552
553    /* normal case with length>=0 (length -1 used for special cases below) */
554    errorCode=U_ZERO_ERROR;
555    if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
556        log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
557    }
558
559    /* incoming U_FAILURE */
560    errorCode=U_TRUNCATED_CHAR_FOUND;
561    (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
562    if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
563        log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
564    }
565
566    /* NULL source */
567    errorCode=U_ZERO_ERROR;
568    (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
569    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
570        log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
571    }
572
573    /* bad length */
574    errorCode=U_ZERO_ERROR;
575    (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
576    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
577        log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
578    }
579
580    /* specific cases */
581    for(i=0; i<LENGTHOF(notNFC); ++i) {
582        errorCode=U_ZERO_ERROR;
583        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
584            log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
585        }
586        errorCode=U_ZERO_ERROR;
587        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
588            log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
589        }
590    }
591    for(i=0; i<LENGTHOF(notNFKC); ++i) {
592        errorCode=U_ZERO_ERROR;
593        if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
594            log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
595        }
596    }
597}
598
599void TestCheckFCD()
600{
601  UErrorCode status = U_ZERO_ERROR;
602  static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
603                         0x0A};
604  static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
605                          0x02B9, 0x0314, 0x0315, 0x0316};
606  static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
607                         0x0050, 0x0730, 0x09EE, 0x1E10};
608
609  static const UChar datastr[][5] =
610  { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
611    {0x0061, 0x030A, 0x00E2, 0x0323, 0},
612    {0x0061, 0x0323, 0x00E2, 0x0323, 0},
613    {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
614  static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
615
616  static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
617                            0x6a,
618                            0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
619                            0xea,
620                            0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
621                            0x0307, 0x0308, 0x0309, 0x030a,
622                            0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
623                            0x0327, 0x0328, 0x0329, 0x032a,
624                            0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
625                            0x1e07, 0x1e08, 0x1e09, 0x1e0a};
626
627  int count = 0;
628
629  if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
630    log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
631  if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
632    log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
633  if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
634    log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
635
636  if (U_FAILURE(status))
637    log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
638
639  while (count < 4)
640  {
641    UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
642    if (U_FAILURE(status)) {
643      log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
644      break;
645    }
646    else {
647      if (result[count] != fcdresult) {
648        log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
649                 result[count]);
650      }
651    }
652    count ++;
653  }
654
655  /* random checks of long strings */
656  status = U_ZERO_ERROR;
657  srand((unsigned)time( NULL ));
658
659  for (count = 0; count < 50; count ++)
660  {
661    int size = 0;
662    UBool testresult = UNORM_YES;
663    UChar data[20];
664    UChar norm[100];
665    UChar nfd[100];
666    int normsize = 0;
667    int nfdsize = 0;
668
669    while (size != 19) {
670      data[size] = datachar[(rand() * 50) / RAND_MAX];
671      log_verbose("0x%x", data[size]);
672      normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
673                                  norm + normsize, 100 - normsize, &status);
674      if (U_FAILURE(status)) {
675        log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
676        break;
677      }
678      size ++;
679    }
680    log_verbose("\n");
681
682    nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
683                              nfd, 100, &status);
684    if (U_FAILURE(status)) {
685      log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
686    }
687
688    if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
689      testresult = UNORM_NO;
690    }
691    if (testresult == UNORM_YES) {
692      log_verbose("result UNORM_YES\n");
693    }
694    else {
695      log_verbose("result UNORM_NO\n");
696    }
697
698    if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
699      log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
700    }
701  }
702}
703
704static void
705TestAPI() {
706    static const UChar in[]={ 0x68, 0xe4 };
707    UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
708    UErrorCode errorCode;
709    int32_t length;
710
711    /* try preflighting */
712    errorCode=U_ZERO_ERROR;
713    length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
714    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
715        log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
716        return;
717    }
718
719    errorCode=U_ZERO_ERROR;
720    length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
721    if(U_FAILURE(errorCode)) {
722        log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
723        return;
724    }
725    if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
726        log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
727        return;
728    }
729    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
730    if(U_FAILURE(errorCode)) {
731        log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
732        return;
733    }
734    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
735    if(U_FAILURE(errorCode)) {
736        log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
737        return;
738    }
739}
740
741/* test cases to improve test code coverage */
742enum {
743    HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
744    HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
745    HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
746
747    HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
748    HANGUL_WEO=0x116f,              /* Jamo V U+116f */
749    HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
750
751    HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
752    HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
753
754    MUSICAL_VOID_NOTEHEAD=0x1d157,
755    MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
756    MUSICAL_STEM=0x1d165,       /* cc=216 */
757    MUSICAL_STACCATO=0x1d17c    /* cc=220 */
758};
759
760static void
761TestNormCoverage() {
762    UChar input[1000], expect[1000], output[1000];
763    UErrorCode errorCode;
764    int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
765
766    /* create a long and nasty string with NFKC-unsafe characters */
767    inLength=0;
768
769    /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
770    input[inLength++]=HANGUL_KIYEOK;
771    input[inLength++]=HANGUL_WEO;
772    input[inLength++]=HANGUL_KIYEOK_SIOS;
773
774    input[inLength++]=HANGUL_KIYEOK;
775    input[inLength++]=HANGUL_WEO;
776    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
777
778    input[inLength++]=HANGUL_KIYEOK;
779    input[inLength++]=HANGUL_K_WEO;
780    input[inLength++]=HANGUL_KIYEOK_SIOS;
781
782    input[inLength++]=HANGUL_KIYEOK;
783    input[inLength++]=HANGUL_K_WEO;
784    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
785
786    input[inLength++]=HANGUL_K_KIYEOK;
787    input[inLength++]=HANGUL_WEO;
788    input[inLength++]=HANGUL_KIYEOK_SIOS;
789
790    input[inLength++]=HANGUL_K_KIYEOK;
791    input[inLength++]=HANGUL_WEO;
792    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
793
794    input[inLength++]=HANGUL_K_KIYEOK;
795    input[inLength++]=HANGUL_K_WEO;
796    input[inLength++]=HANGUL_KIYEOK_SIOS;
797
798    input[inLength++]=HANGUL_K_KIYEOK;
799    input[inLength++]=HANGUL_K_WEO;
800    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
801
802    /* Hangul LV with normal/compatibility Jamo T */
803    input[inLength++]=HANGUL_AC00;
804    input[inLength++]=HANGUL_KIYEOK_SIOS;
805
806    input[inLength++]=HANGUL_AC00;
807    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
808
809    /* compatibility Jamo L, V */
810    input[inLength++]=HANGUL_K_KIYEOK;
811    input[inLength++]=HANGUL_K_WEO;
812
813    hangulPrefixLength=inLength;
814
815    input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
816    input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
817    for(i=0; i<200; ++i) {
818        input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
819        input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
820        input[inLength++]=U16_LEAD(MUSICAL_STEM);
821        input[inLength++]=U16_TRAIL(MUSICAL_STEM);
822    }
823
824    /* (compatibility) Jamo L, T do not compose */
825    input[inLength++]=HANGUL_K_KIYEOK;
826    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
827
828    /* quick checks */
829    errorCode=U_ZERO_ERROR;
830    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
831        log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
832    }
833    errorCode=U_ZERO_ERROR;
834    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
835        log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
836    }
837    errorCode=U_ZERO_ERROR;
838    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
839        log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
840    }
841    errorCode=U_ZERO_ERROR;
842    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
843        log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
844    }
845    errorCode=U_ZERO_ERROR;
846    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
847        log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
848    }
849
850    /* NFKC */
851    expectLength=0;
852    expect[expectLength++]=HANGUL_SYLLABLE;
853
854    expect[expectLength++]=HANGUL_SYLLABLE;
855
856    expect[expectLength++]=HANGUL_SYLLABLE;
857
858    expect[expectLength++]=HANGUL_SYLLABLE;
859
860    expect[expectLength++]=HANGUL_SYLLABLE;
861
862    expect[expectLength++]=HANGUL_SYLLABLE;
863
864    expect[expectLength++]=HANGUL_SYLLABLE;
865
866    expect[expectLength++]=HANGUL_SYLLABLE;
867
868    expect[expectLength++]=HANGUL_AC00+3;
869
870    expect[expectLength++]=HANGUL_AC00+3;
871
872    expect[expectLength++]=HANGUL_AC00+14*28;
873
874    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
875    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
876    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
877    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
878    for(i=0; i<200; ++i) {
879        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
880        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
881    }
882    for(i=0; i<200; ++i) {
883        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
884        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
885    }
886
887    expect[expectLength++]=HANGUL_KIYEOK;
888    expect[expectLength++]=HANGUL_KIYEOK_SIOS;
889
890    /* try destination overflow first */
891    errorCode=U_ZERO_ERROR;
892    preflightLength=unorm_normalize(input, inLength,
893                           UNORM_NFKC, 0,
894                           output, 100, /* too short */
895                           &errorCode);
896    if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
897        log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
898    }
899
900    /* real NFKC */
901    errorCode=U_ZERO_ERROR;
902    length=unorm_normalize(input, inLength,
903                           UNORM_NFKC, 0,
904                           output, sizeof(output)/U_SIZEOF_UCHAR,
905                           &errorCode);
906    if(U_FAILURE(errorCode)) {
907        log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
908    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
909        log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
910        for(i=0; i<length; ++i) {
911            if(output[i]!=expect[i]) {
912                log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
913                break;
914            }
915        }
916    }
917    if(length!=preflightLength) {
918        log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
919    }
920
921    /* FCD */
922    u_memcpy(expect, input, hangulPrefixLength);
923    expectLength=hangulPrefixLength;
924
925    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
926    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
927    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
928    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
929    for(i=0; i<200; ++i) {
930        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
931        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
932    }
933    for(i=0; i<200; ++i) {
934        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
935        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
936    }
937
938    expect[expectLength++]=HANGUL_K_KIYEOK;
939    expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
940
941    errorCode=U_ZERO_ERROR;
942    length=unorm_normalize(input, inLength,
943                           UNORM_FCD, 0,
944                           output, sizeof(output)/U_SIZEOF_UCHAR,
945                           &errorCode);
946    if(U_FAILURE(errorCode)) {
947        log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
948    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
949        log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
950        for(i=0; i<length; ++i) {
951            if(output[i]!=expect[i]) {
952                log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
953                break;
954            }
955        }
956    }
957}
958
959/* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
960static void
961TestConcatenate(void) {
962    /* "re + 'sume'" */
963    static const UChar
964    left[]={
965        0x72, 0x65, 0
966    },
967    right[]={
968        0x301, 0x73, 0x75, 0x6d, 0xe9, 0
969    },
970    expect[]={
971        0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
972    };
973
974    UChar buffer[100];
975    UErrorCode errorCode;
976    int32_t length;
977
978    /* left with length, right NUL-terminated */
979    errorCode=U_ZERO_ERROR;
980    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
981    if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
982        log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
983    }
984
985    /* preflighting */
986    errorCode=U_ZERO_ERROR;
987    length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
988    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
989        log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
990    }
991
992    buffer[2]=0x5555;
993    errorCode=U_ZERO_ERROR;
994    length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
995    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
996        log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
997    }
998
999    /* enter with U_FAILURE */
1000    buffer[2]=0xaaaa;
1001    errorCode=U_UNEXPECTED_TOKEN;
1002    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1003    if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1004        log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1005    }
1006
1007    /* illegal arguments */
1008    buffer[2]=0xaaaa;
1009    errorCode=U_ZERO_ERROR;
1010    length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1011    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1012        log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1013    }
1014
1015    errorCode=U_ZERO_ERROR;
1016    length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1017    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1018        log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1019    }
1020}
1021
1022enum {
1023    _PLUS=0x2b
1024};
1025
1026static const char *const _modeString[UNORM_MODE_COUNT]={
1027    "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1028};
1029
1030static void
1031_testIter(const UChar *src, int32_t srcLength,
1032          UCharIterator *iter, UNormalizationMode mode, UBool forward,
1033          const UChar *out, int32_t outLength,
1034          const int32_t *srcIndexes, int32_t srcIndexesLength) {
1035    UChar buffer[4];
1036    const UChar *expect, *outLimit, *in;
1037    int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1038    UErrorCode errorCode;
1039    UBool neededToNormalize, expectNeeded;
1040
1041    errorCode=U_ZERO_ERROR;
1042    outLimit=out+outLength;
1043    if(forward) {
1044        expect=out;
1045        i=index=0;
1046    } else {
1047        expect=outLimit;
1048        i=srcIndexesLength-2;
1049        index=srcLength;
1050    }
1051
1052    for(;;) {
1053        prevIndex=index;
1054        if(forward) {
1055            if(!iter->hasNext(iter)) {
1056                return;
1057            }
1058            length=unorm_next(iter,
1059                              buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1060                              mode, 0,
1061                              (UBool)(out!=NULL), &neededToNormalize,
1062                              &errorCode);
1063            expectIndex=srcIndexes[i+1];
1064            in=src+prevIndex;
1065            inLength=expectIndex-prevIndex;
1066
1067            if(out!=NULL) {
1068                /* get output piece from between plus signs */
1069                expectLength=0;
1070                while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1071                    ++expectLength;
1072                }
1073                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1074            } else {
1075                expect=in;
1076                expectLength=inLength;
1077                expectNeeded=FALSE;
1078            }
1079        } else {
1080            if(!iter->hasPrevious(iter)) {
1081                return;
1082            }
1083            length=unorm_previous(iter,
1084                                  buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1085                                  mode, 0,
1086                                  (UBool)(out!=NULL), &neededToNormalize,
1087                                  &errorCode);
1088            expectIndex=srcIndexes[i];
1089            in=src+expectIndex;
1090            inLength=prevIndex-expectIndex;
1091
1092            if(out!=NULL) {
1093                /* get output piece from between plus signs */
1094                expectLength=0;
1095                while(expect!=out && expect[-1]!=_PLUS) {
1096                    ++expectLength;
1097                    --expect;
1098                }
1099                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1100            } else {
1101                expect=in;
1102                expectLength=inLength;
1103                expectNeeded=FALSE;
1104            }
1105        }
1106        index=iter->getIndex(iter, UITER_CURRENT);
1107
1108        if(U_FAILURE(errorCode)) {
1109            log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1110                    forward, _modeString[mode], i, u_errorName(errorCode));
1111            return;
1112        }
1113        if(expectIndex!=index) {
1114            log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1115                    forward, _modeString[mode], i, index, expectIndex);
1116            return;
1117        }
1118        if(expectLength!=length) {
1119            log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1120                    forward, _modeString[mode], i, length, expectLength);
1121            return;
1122        }
1123        if(0!=u_memcmp(expect, buffer, length)) {
1124            log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1125                    forward, _modeString[mode], i);
1126            return;
1127        }
1128        if(neededToNormalize!=expectNeeded) {
1129        }
1130
1131        if(forward) {
1132            expect+=expectLength+1; /* go after the + */
1133            ++i;
1134        } else {
1135            --expect; /* go before the + */
1136            --i;
1137        }
1138    }
1139}
1140
1141static void
1142TestNextPrevious() {
1143    static const UChar
1144    src[]={ /* input string */
1145        0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1146    },
1147    nfd[]={ /* + separates expected output pieces */
1148        0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1149    },
1150    nfkd[]={
1151        0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1152    },
1153    nfc[]={
1154        0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1155    },
1156    nfkc[]={
1157        0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1158    },
1159    fcd[]={
1160        0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1161    };
1162
1163    /* expected iterator indexes in the source string for each iteration piece */
1164    static const int32_t
1165    nfdIndexes[]={
1166        0, 1, 2, 5, 6, 7
1167    },
1168    nfkdIndexes[]={
1169        0, 1, 2, 5, 6, 7
1170    },
1171    nfcIndexes[]={
1172        0, 1, 2, 5, 6, 7
1173    },
1174    nfkcIndexes[]={
1175        0, 1, 2, 5, 7
1176    },
1177    fcdIndexes[]={
1178        0, 1, 2, 5, 6, 7
1179    };
1180
1181    UCharIterator iter;
1182
1183    UChar buffer[4];
1184    int32_t length;
1185
1186    UBool neededToNormalize;
1187    UErrorCode errorCode;
1188
1189    uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1190
1191    /* test iteration with doNormalize */
1192    iter.index=0;
1193    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1194    iter.index=0;
1195    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1196    iter.index=0;
1197    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1198    iter.index=0;
1199    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1200    iter.index=0;
1201    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1202
1203    iter.index=iter.length;
1204    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1205    iter.index=iter.length;
1206    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1207    iter.index=iter.length;
1208    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1209    iter.index=iter.length;
1210    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1211    iter.index=iter.length;
1212    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1213
1214    /* test iteration without doNormalize */
1215    iter.index=0;
1216    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1217    iter.index=0;
1218    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1219    iter.index=0;
1220    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1221    iter.index=0;
1222    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1223    iter.index=0;
1224    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1225
1226    iter.index=iter.length;
1227    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1228    iter.index=iter.length;
1229    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1230    iter.index=iter.length;
1231    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1232    iter.index=iter.length;
1233    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1234    iter.index=iter.length;
1235    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1236
1237    /* try without neededToNormalize */
1238    errorCode=U_ZERO_ERROR;
1239    buffer[0]=5;
1240    iter.index=1;
1241    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1242                      UNORM_NFD, 0, TRUE, NULL,
1243                      &errorCode);
1244    if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1245        log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1246        return;
1247    }
1248
1249    /* preflight */
1250    neededToNormalize=9;
1251    iter.index=1;
1252    length=unorm_next(&iter, NULL, 0,
1253                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1254                      &errorCode);
1255    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1256        log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1257        return;
1258    }
1259
1260    errorCode=U_ZERO_ERROR;
1261    buffer[0]=buffer[1]=5;
1262    neededToNormalize=9;
1263    iter.index=1;
1264    length=unorm_next(&iter, buffer, 1,
1265                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1266                      &errorCode);
1267    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1268        log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1269        return;
1270    }
1271
1272    /* no iterator */
1273    errorCode=U_ZERO_ERROR;
1274    buffer[0]=buffer[1]=5;
1275    neededToNormalize=9;
1276    iter.index=1;
1277    length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1278                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1279                      &errorCode);
1280    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1281        log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1282        return;
1283    }
1284
1285    /* illegal mode */
1286    buffer[0]=buffer[1]=5;
1287    neededToNormalize=9;
1288    iter.index=1;
1289    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1290                      (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1291                      &errorCode);
1292    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1293        log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1294        return;
1295    }
1296
1297    /* error coming in */
1298    errorCode=U_MISPLACED_QUANTIFIER;
1299    buffer[0]=5;
1300    iter.index=1;
1301    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1302                      UNORM_NFD, 0, TRUE, NULL,
1303                      &errorCode);
1304    if(errorCode!=U_MISPLACED_QUANTIFIER) {
1305        log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1306        return;
1307    }
1308}
1309
1310static void
1311TestFCNFKCClosure(void) {
1312    static const struct {
1313        UChar32 c;
1314        const UChar s[6];
1315    } tests[]={
1316        { 0x00C4, { 0 } },
1317        { 0x00E4, { 0 } },
1318        { 0x037A, { 0x0020, 0x03B9, 0 } },
1319        { 0x03D2, { 0x03C5, 0 } },
1320        { 0x20A8, { 0x0072, 0x0073, 0 } },
1321        { 0x210B, { 0x0068, 0 } },
1322        { 0x210C, { 0x0068, 0 } },
1323        { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1324        { 0x2122, { 0x0074, 0x006D, 0 } },
1325        { 0x2128, { 0x007A, 0 } },
1326        { 0x1D5DB, { 0x0068, 0 } },
1327        { 0x1D5ED, { 0x007A, 0 } },
1328        { 0x0061, { 0 } }
1329    };
1330
1331    UChar buffer[8];
1332    UErrorCode errorCode;
1333    int32_t i, length;
1334
1335    for(i=0; i<LENGTHOF(tests); ++i) {
1336        errorCode=U_ZERO_ERROR;
1337        length=u_getFC_NFKC_Closure(tests[i].c, buffer, LENGTHOF(buffer), &errorCode);
1338        if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1339            log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1340        }
1341    }
1342
1343    /* error handling */
1344    errorCode=U_ZERO_ERROR;
1345    length=u_getFC_NFKC_Closure(0x5c, NULL, LENGTHOF(buffer), &errorCode);
1346    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1347        log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1348    }
1349
1350    length=u_getFC_NFKC_Closure(0x5c, buffer, LENGTHOF(buffer), &errorCode);
1351    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1352        log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1353    }
1354}
1355
1356static void
1357TestQuickCheckPerCP() {
1358    UErrorCode errorCode;
1359    UChar32 c, lead, trail;
1360    UChar s[U16_MAX_LENGTH], nfd[16];
1361    int32_t length, lccc1, lccc2, tccc1, tccc2;
1362    int32_t qc1, qc2;
1363
1364    if(
1365        u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1366        u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367        u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1368        u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369        u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1370        u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1371    ) {
1372        log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1373    }
1374
1375    /*
1376     * compare the quick check property values for some code points
1377     * to the quick check results for checking same-code point strings
1378     */
1379    errorCode=U_ZERO_ERROR;
1380    c=0;
1381    while(c<0x110000) {
1382        length=0;
1383        U16_APPEND_UNSAFE(s, length, c);
1384
1385        qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1386        qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1387        if(qc1!=qc2) {
1388            log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1389        }
1390
1391        qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1392        qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1393        if(qc1!=qc2) {
1394            log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1395        }
1396
1397        qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1398        qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1399        if(qc1!=qc2) {
1400            log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1401        }
1402
1403        qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1404        qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1405        if(qc1!=qc2) {
1406            log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1407        }
1408
1409        length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, LENGTHOF(nfd), &errorCode);
1410        /* length-length == 0 is used to get around a compiler warning. */
1411        U16_GET(nfd, 0, length-length, length, lead);
1412        U16_GET(nfd, 0, length-1, length, trail);
1413
1414        lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1415        lccc2=u_getCombiningClass(lead);
1416        tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1417        tccc2=u_getCombiningClass(trail);
1418
1419        if(lccc1!=lccc2) {
1420            log_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1421                    lccc1, lccc2, c);
1422        }
1423        if(tccc1!=tccc2) {
1424            log_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1425                    tccc1, tccc2, c);
1426        }
1427
1428        /* skip some code points */
1429        c=(20*c)/19+1;
1430    }
1431}
1432
1433static void
1434TestComposition(void) {
1435    static const struct {
1436        UNormalizationMode mode;
1437        uint32_t options;
1438        UChar input[12];
1439        UChar expect[12];
1440    } cases[]={
1441        /*
1442         * special cases for UAX #15 bug
1443         * see Unicode Corrigendum #5: Normalization Idempotency
1444         * at http://unicode.org/versions/corrigendum5.html
1445         * (was Public Review Issue #29)
1446         */
1447        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1448        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1449        { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1450        { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1451
1452        /* TODO: add test cases for UNORM_FCC here (j2151) */
1453    };
1454
1455    UChar output[16];
1456    UErrorCode errorCode;
1457    int32_t i, length;
1458
1459    for(i=0; i<LENGTHOF(cases); ++i) {
1460        errorCode=U_ZERO_ERROR;
1461        length=unorm_normalize(
1462                    cases[i].input, -1,
1463                    cases[i].mode, cases[i].options,
1464                    output, LENGTHOF(output),
1465                    &errorCode);
1466        if( U_FAILURE(errorCode) ||
1467            length!=u_strlen(cases[i].expect) ||
1468            0!=u_memcmp(output, cases[i].expect, length)
1469        ) {
1470            log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1471        }
1472    }
1473}
1474
1475static void
1476TestGetDecomposition() {
1477    UChar decomp[32];
1478    int32_t length;
1479
1480    UErrorCode errorCode=U_ZERO_ERROR;
1481    const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1482    if(U_FAILURE(errorCode)) {
1483        log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1484        return;
1485    }
1486
1487    length=unorm2_getDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1488    if(U_FAILURE(errorCode) || length>=0) {
1489        log_err("unorm2_getDecomposition(fcc, space) failed\n");
1490    }
1491    errorCode=U_ZERO_ERROR;
1492    length=unorm2_getDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1493    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1494        log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1495    }
1496    errorCode=U_ZERO_ERROR;
1497    length=unorm2_getDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1498    if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1499        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1500    }
1501    errorCode=U_ZERO_ERROR;
1502    length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1503    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1504        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1505    }
1506    errorCode=U_ZERO_ERROR;
1507    length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1508    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1509        log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1510    }
1511    errorCode=U_ZERO_ERROR;
1512    length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1513    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1514        log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1515    }
1516}
1517
1518static void
1519TestGetRawDecomposition() {
1520    UChar decomp[32];
1521    int32_t length;
1522
1523    UErrorCode errorCode=U_ZERO_ERROR;
1524    const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1525    if(U_FAILURE(errorCode)) {
1526        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1527        return;
1528    }
1529    /*
1530     * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1531     * without recursive decomposition.
1532     */
1533
1534    length=unorm2_getRawDecomposition(n2, 0x20, decomp, LENGTHOF(decomp), &errorCode);
1535    if(U_FAILURE(errorCode) || length>=0) {
1536        log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1537    }
1538    errorCode=U_ZERO_ERROR;
1539    length=unorm2_getRawDecomposition(n2, 0xe4, decomp, LENGTHOF(decomp), &errorCode);
1540    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1541        log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1542    }
1543    /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1544    errorCode=U_ZERO_ERROR;
1545    length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, LENGTHOF(decomp), &errorCode);
1546    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1547        log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1548    }
1549    /* U+212B ANGSTROM SIGN */
1550    errorCode=U_ZERO_ERROR;
1551    length=unorm2_getRawDecomposition(n2, 0x212b, decomp, LENGTHOF(decomp), &errorCode);
1552    if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1553        log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1554    }
1555    errorCode=U_ZERO_ERROR;
1556    length=unorm2_getRawDecomposition(n2, 0xac00, decomp, LENGTHOF(decomp), &errorCode);
1557    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1558        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1559    }
1560    /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1561    errorCode=U_ZERO_ERROR;
1562    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, LENGTHOF(decomp), &errorCode);
1563    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1564        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1565    }
1566    errorCode=U_ZERO_ERROR;
1567    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1568    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1569        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1570    }
1571    errorCode=U_ZERO_ERROR;
1572    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1573    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1574        log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1575    }
1576    errorCode=U_ZERO_ERROR;
1577    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1578    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1579        log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1580    }
1581}
1582
1583static void
1584TestAppendRestoreMiddle() {
1585    UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1586    static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1587    /* NFC: C5 is 'A with ring above' */
1588    static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1589    int32_t length;
1590    UErrorCode errorCode=U_ZERO_ERROR;
1591    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1592    if(U_FAILURE(errorCode)) {
1593        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1594        return;
1595    }
1596    /*
1597     * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1598     * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1599     * still fits into a[] but the full result still overflows this capacity.
1600     * (Let it modify the destination buffer before reallocating internally.)
1601     */
1602    length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1603    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=LENGTHOF(expected)) {
1604        log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1605        return;
1606    }
1607    /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1608    if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1609        log_err("unorm2_append(overflow) modified the first string\n");
1610        return;
1611    }
1612    errorCode=U_ZERO_ERROR;
1613    length=unorm2_append(n2, a, -1, LENGTHOF(a), b, -1, &errorCode);
1614    if(U_FAILURE(errorCode) || length!=LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1615        log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1616        return;
1617    }
1618}
1619
1620static void
1621TestGetEasyToUseInstance() {
1622    static const UChar in[]={
1623        0xA0,  /* -> <noBreak> 0020 */
1624        0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1625    };
1626    UChar out[32];
1627    int32_t length;
1628
1629    UErrorCode errorCode=U_ZERO_ERROR;
1630    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1631    if(U_FAILURE(errorCode)) {
1632        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1633        return;
1634    }
1635    length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1636    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1637        log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1638                (int)length, u_errorName(errorCode));
1639    }
1640
1641    errorCode=U_ZERO_ERROR;
1642    n2=unorm2_getNFDInstance(&errorCode);
1643    if(U_FAILURE(errorCode)) {
1644        log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1645        return;
1646    }
1647    length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1648    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1649        log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1650                (int)length, u_errorName(errorCode));
1651    }
1652
1653    errorCode=U_ZERO_ERROR;
1654    n2=unorm2_getNFKCInstance(&errorCode);
1655    if(U_FAILURE(errorCode)) {
1656        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1657        return;
1658    }
1659    length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1660    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1661        log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1662                (int)length, u_errorName(errorCode));
1663    }
1664
1665    errorCode=U_ZERO_ERROR;
1666    n2=unorm2_getNFKDInstance(&errorCode);
1667    if(U_FAILURE(errorCode)) {
1668        log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1669        return;
1670    }
1671    length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1672    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1673        log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1674                (int)length, u_errorName(errorCode));
1675    }
1676
1677    errorCode=U_ZERO_ERROR;
1678    n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1679    if(U_FAILURE(errorCode)) {
1680        log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1681        return;
1682    }
1683    length=unorm2_normalize(n2, in, LENGTHOF(in), out, LENGTHOF(out), &errorCode);
1684    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1685        log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1686                (int)length, u_errorName(errorCode));
1687    }
1688}
1689
1690#endif /* #if !UCONFIG_NO_NORMALIZATION */
1691