1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File CNORMTST.C
9*
10* Modification History:
11*        Name                     Description
12*     Madhu Katragadda            Ported for C API
13*     synwee                      added test for quick check
14*     synwee                      added test for checkFCD
15*********************************************************************************/
16/*tests for u_normalization*/
17#include "unicode/utypes.h"
18#include "unicode/unorm.h"
19#include "unicode/utf16.h"
20#include "cintltst.h"
21#include "cmemory.h"
22
23#if !UCONFIG_NO_NORMALIZATION
24
25#include <stdlib.h>
26#include <time.h>
27#include "unicode/uchar.h"
28#include "unicode/ustring.h"
29#include "unicode/unorm.h"
30#include "cnormtst.h"
31
32static void
33TestAPI(void);
34
35static void
36TestNormCoverage(void);
37
38static void
39TestConcatenate(void);
40
41static void
42TestNextPrevious(void);
43
44static void TestIsNormalized(void);
45
46static void
47TestFCNFKCClosure(void);
48
49static void
50TestQuickCheckPerCP(void);
51
52static void
53TestComposition(void);
54
55static void
56TestFCD(void);
57
58static void
59TestGetDecomposition(void);
60
61static void
62TestGetRawDecomposition(void);
63
64static void TestAppendRestoreMiddle(void);
65static void TestGetEasyToUseInstance(void);
66
67static const char* const canonTests[][3] = {
68    /* Input*/                    /*Decomposed*/                /*Composed*/
69    { "cat",                    "cat",                        "cat"                    },
70    { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
71
72    { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
73    { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
74
75    { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
76    { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
77    { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
78
79    { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
80    { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
81
82    { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
83    { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
84    { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
85
86    { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
87    { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
88
89    { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
90    { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
91
92    { "Henry IV",                "Henry IV",                    "Henry IV"                },
93    { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
94
95    { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
96    { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
97    { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
98    { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
99    { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
100    { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
101    { "", "", "" }
102};
103
104static const char* const compatTests[][3] = {
105    /* Input*/                        /*Decomposed    */                /*Composed*/
106    { "cat",                        "cat",                            "cat"                },
107
108    { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
109
110    { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
111    { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
112
113    { "Henry IV",                    "Henry IV",                        "Henry IV"            },
114    { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
115
116    { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
117    { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
118
119    { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
120
121    /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
122    { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
123    { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
124    { "", "", "" }
125};
126
127static const char* const fcdTests[][3] = {
128    /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
129    { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
130    { "\\u010e", "\\u010e", NULL }  /* D-caron */
131};
132
133void addNormTest(TestNode** root);
134
135void addNormTest(TestNode** root)
136{
137    addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
138    addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
139    addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
140    addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
141    addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
142    addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
143    addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
144    addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
145    addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
146    addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
147    addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
148    addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
149    addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
150    addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
151    addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
152    addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
153    addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
154    addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
155    addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
156    addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
157}
158
159static const char* const modeStrings[]={
160    "UNORM_NONE",
161    "UNORM_NFD",
162    "UNORM_NFKD",
163    "UNORM_NFC",
164    "UNORM_NFKC",
165    "UNORM_FCD",
166    "UNORM_MODE_COUNT"
167};
168
169static void TestNormCases(UNormalizationMode mode,
170                          const char* const cases[][3], int32_t lengthOfCases) {
171    int32_t x, neededLen, length2;
172    int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
173    UChar *source=NULL;
174    UChar result[16];
175    log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
176    for(x=0; x < lengthOfCases; x++)
177    {
178        UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
179        source=CharsToUChars(cases[x][0]);
180        neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
181        length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
182        if(neededLen!=length2) {
183          log_err("ERROR in unorm_normalize(%s)[%d]: "
184                  "preflight length/NUL %d!=%d preflight length/srcLength\n",
185                  modeStrings[mode], (int)x, (int)neededLen, (int)length2);
186        }
187        if(status==U_BUFFER_OVERFLOW_ERROR)
188        {
189            status=U_ZERO_ERROR;
190        }
191        length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
192        if(U_FAILURE(status) || neededLen!=length2) {
193            log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
194                         modeStrings[mode], austrdup(source), myErrorName(status));
195        } else {
196            assertEqual(result, cases[x][expIndex], x);
197        }
198        length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
199        if(U_FAILURE(status) || neededLen!=length2) {
200            log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
201                         modeStrings[mode], austrdup(source), myErrorName(status));
202        } else {
203            assertEqual(result, cases[x][expIndex], x);
204        }
205        free(source);
206    }
207}
208
209void TestDecomp() {
210    TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
211}
212
213void TestCompatDecomp() {
214    TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
215}
216
217void TestCanonDecompCompose() {
218    TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
219}
220
221void TestCompatDecompCompose() {
222    TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
223}
224
225void TestFCD() {
226    TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
227}
228
229static void assertEqual(const UChar* result, const char* expected, int32_t index)
230{
231    UChar *expectedUni = CharsToUChars(expected);
232    if(u_strcmp(result, expectedUni)!=0){
233        log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
234            austrdup(result) );
235    }
236    free(expectedUni);
237}
238
239static void TestNull_check(UChar *src, int32_t srcLen,
240                    UChar *exp, int32_t expLen,
241                    UNormalizationMode mode,
242                    const char *name)
243{
244    UErrorCode status = U_ZERO_ERROR;
245    int32_t len, i;
246
247    UChar   result[50];
248
249
250    status = U_ZERO_ERROR;
251
252    for(i=0;i<50;i++)
253      {
254        result[i] = 0xFFFD;
255      }
256
257    len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
258
259    if(U_FAILURE(status)) {
260      log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
261    } else if (len != expLen) {
262      log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
263    }
264
265    {
266      for(i=0;i<len;i++){
267        if(exp[i] != result[i]) {
268          log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
269                  name,
270                  i,
271                  exp[i],
272                  result[i]);
273          return;
274        }
275        log_verbose("     %d: \\u%04X\n", i, result[i]);
276      }
277    }
278
279    log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
280}
281
282void TestNull()
283{
284
285    UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
286    int32_t source_comp_len = 4;
287    UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
288    int32_t expect_comp_len = 3;
289
290    UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
291    int32_t source_dcmp_len = 3;
292    UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
293    int32_t expect_dcmp_len = 5;
294
295    TestNull_check(source_comp,
296                   source_comp_len,
297                   expect_comp,
298                   expect_comp_len,
299                   UNORM_NFC,
300                   "UNORM_NFC");
301
302    TestNull_check(source_dcmp,
303                   source_dcmp_len,
304                   expect_dcmp,
305                   expect_dcmp_len,
306                   UNORM_NFD,
307                   "UNORM_NFD");
308
309    TestNull_check(source_comp,
310                   source_comp_len,
311                   expect_comp,
312                   expect_comp_len,
313                   UNORM_NFKC,
314                   "UNORM_NFKC");
315
316
317}
318
319static void TestQuickCheckResultNO()
320{
321  const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
322                         0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
323  const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
324                          0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
325  const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
326                           0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
327  const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
328                           0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
329
330
331  const int SIZE = 10;
332
333  int count = 0;
334  UErrorCode error = U_ZERO_ERROR;
335
336  for (; count < SIZE; count ++)
337  {
338    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
339                                                              UNORM_NO)
340    {
341      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
342      return;
343    }
344    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
345                                                              UNORM_NO)
346    {
347      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
348      return;
349    }
350    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
351                                                              UNORM_NO)
352    {
353      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
354      return;
355    }
356    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
357                                                              UNORM_NO)
358    {
359      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
360      return;
361    }
362  }
363}
364
365
366static void TestQuickCheckResultYES()
367{
368  const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
369                         0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
370  const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
371                         0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
372  const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
373                          0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
374  const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
375                          0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
376
377  const int SIZE = 10;
378  int count = 0;
379  UErrorCode error = U_ZERO_ERROR;
380
381  UChar cp = 0;
382  while (cp < 0xA0)
383  {
384    if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
385    {
386      log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
387      return;
388    }
389    if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
390                                                             UNORM_YES)
391    {
392      log_err("ERROR in NFC quick check at U+%04x\n", cp);
393      return;
394    }
395    if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
396    {
397      log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
398      return;
399    }
400    if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
401                                                             UNORM_YES)
402    {
403      log_err("ERROR in NFKC quick check at U+%04x\n", cp);
404      return;
405    }
406    cp ++;
407  }
408
409  for (; count < SIZE; count ++)
410  {
411    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
412                                                             UNORM_YES)
413    {
414      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
415      return;
416    }
417    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
418                                                          != UNORM_YES)
419    {
420      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
421      return;
422    }
423    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
424                                                             UNORM_YES)
425    {
426      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
427      return;
428    }
429    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
430                                                             UNORM_YES)
431    {
432      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
433      return;
434    }
435  }
436}
437
438static void TestQuickCheckResultMAYBE()
439{
440  const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
441                         0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
442  const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
443                          0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
444
445
446  const int SIZE = 10;
447
448  int count = 0;
449  UErrorCode error = U_ZERO_ERROR;
450
451  /* NFD and NFKD does not have any MAYBE codepoints */
452  for (; count < SIZE; count ++)
453  {
454    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
455                                                           UNORM_MAYBE)
456    {
457      log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
458      return;
459    }
460    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
461                                                           UNORM_MAYBE)
462    {
463      log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
464      return;
465    }
466  }
467}
468
469static void TestQuickCheckStringResult()
470{
471  int count;
472  UChar *d = NULL;
473  UChar *c = NULL;
474  UErrorCode error = U_ZERO_ERROR;
475
476  for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
477  {
478    d = CharsToUChars(canonTests[count][1]);
479    c = CharsToUChars(canonTests[count][2]);
480    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
481                                                            UNORM_YES)
482    {
483      log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
484      return;
485    }
486
487    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
488                                                            UNORM_NO)
489    {
490      log_err("ERROR in NFC quick check for string at count %d\n", count);
491      return;
492    }
493
494    free(d);
495    free(c);
496  }
497
498  for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
499  {
500    d = CharsToUChars(compatTests[count][1]);
501    c = CharsToUChars(compatTests[count][2]);
502    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
503                                                            UNORM_YES)
504    {
505      log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
506      return;
507    }
508
509    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
510                                                            UNORM_YES)
511    {
512      log_err("ERROR in NFKC quick check for string at count %d\n", count);
513      return;
514    }
515
516    free(d);
517    free(c);
518  }
519}
520
521void TestQuickCheck()
522{
523  TestQuickCheckResultNO();
524  TestQuickCheckResultYES();
525  TestQuickCheckResultMAYBE();
526  TestQuickCheckStringResult();
527}
528
529/*
530 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
531 * normalized, and some that are not.
532 * Here we pick some specific cases and test the C API.
533 */
534static void TestIsNormalized(void) {
535    static const UChar notNFC[][8]={            /* strings that are not in NFC */
536        { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
537        { 0xfb1d, 0 },                          /* excluded from composition */
538        { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
539        { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
540    };
541    static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
542        { 0x1100, 0x1161, 0 },                  /* Jamo compose */
543        { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
544        { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
545    };
546
547    int32_t i;
548    UErrorCode errorCode;
549
550    /* API test */
551
552    /* normal case with length>=0 (length -1 used for special cases below) */
553    errorCode=U_ZERO_ERROR;
554    if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
555        log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
556    }
557
558    /* incoming U_FAILURE */
559    errorCode=U_TRUNCATED_CHAR_FOUND;
560    (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
561    if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
562        log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
563    }
564
565    /* NULL source */
566    errorCode=U_ZERO_ERROR;
567    (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
568    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
569        log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
570    }
571
572    /* bad length */
573    errorCode=U_ZERO_ERROR;
574    (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
575    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
576        log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
577    }
578
579    /* specific cases */
580    for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
581        errorCode=U_ZERO_ERROR;
582        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
583            log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
584        }
585        errorCode=U_ZERO_ERROR;
586        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
587            log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
588        }
589    }
590    for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
591        errorCode=U_ZERO_ERROR;
592        if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
593            log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
594        }
595    }
596}
597
598void TestCheckFCD()
599{
600  UErrorCode status = U_ZERO_ERROR;
601  static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
602                         0x0A};
603  static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
604                          0x02B9, 0x0314, 0x0315, 0x0316};
605  static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
606                         0x0050, 0x0730, 0x09EE, 0x1E10};
607
608  static const UChar datastr[][5] =
609  { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
610    {0x0061, 0x030A, 0x00E2, 0x0323, 0},
611    {0x0061, 0x0323, 0x00E2, 0x0323, 0},
612    {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
613  static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
614
615  static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
616                            0x6a,
617                            0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
618                            0xea,
619                            0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
620                            0x0307, 0x0308, 0x0309, 0x030a,
621                            0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
622                            0x0327, 0x0328, 0x0329, 0x032a,
623                            0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
624                            0x1e07, 0x1e08, 0x1e09, 0x1e0a};
625
626  int count = 0;
627
628  if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
629    log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
630  if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
631    log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
632  if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
633    log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
634
635  if (U_FAILURE(status))
636    log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
637
638  while (count < 4)
639  {
640    UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
641    if (U_FAILURE(status)) {
642      log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
643      break;
644    }
645    else {
646      if (result[count] != fcdresult) {
647        log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
648                 result[count]);
649      }
650    }
651    count ++;
652  }
653
654  /* random checks of long strings */
655  status = U_ZERO_ERROR;
656  srand((unsigned)time( NULL ));
657
658  for (count = 0; count < 50; count ++)
659  {
660    int size = 0;
661    UBool testresult = UNORM_YES;
662    UChar data[20];
663    UChar norm[100];
664    UChar nfd[100];
665    int normsize = 0;
666    int nfdsize = 0;
667
668    while (size != 19) {
669      data[size] = datachar[(rand() * 50) / RAND_MAX];
670      log_verbose("0x%x", data[size]);
671      normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
672                                  norm + normsize, 100 - normsize, &status);
673      if (U_FAILURE(status)) {
674        log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
675        break;
676      }
677      size ++;
678    }
679    log_verbose("\n");
680
681    nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
682                              nfd, 100, &status);
683    if (U_FAILURE(status)) {
684      log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
685    }
686
687    if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
688      testresult = UNORM_NO;
689    }
690    if (testresult == UNORM_YES) {
691      log_verbose("result UNORM_YES\n");
692    }
693    else {
694      log_verbose("result UNORM_NO\n");
695    }
696
697    if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
698      log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
699    }
700  }
701}
702
703static void
704TestAPI() {
705    static const UChar in[]={ 0x68, 0xe4 };
706    UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
707    UErrorCode errorCode;
708    int32_t length;
709
710    /* try preflighting */
711    errorCode=U_ZERO_ERROR;
712    length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
713    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
714        log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
715        return;
716    }
717
718    errorCode=U_ZERO_ERROR;
719    length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
720    if(U_FAILURE(errorCode)) {
721        log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
722        return;
723    }
724    if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
725        log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
726        return;
727    }
728    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
729    if(U_FAILURE(errorCode)) {
730        log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
731        return;
732    }
733    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
734    if(U_FAILURE(errorCode)) {
735        log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
736        return;
737    }
738}
739
740/* test cases to improve test code coverage */
741enum {
742    HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
743    HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
744    HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
745
746    HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
747    HANGUL_WEO=0x116f,              /* Jamo V U+116f */
748    HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
749
750    HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
751    HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
752
753    MUSICAL_VOID_NOTEHEAD=0x1d157,
754    MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
755    MUSICAL_STEM=0x1d165,       /* cc=216 */
756    MUSICAL_STACCATO=0x1d17c    /* cc=220 */
757};
758
759static void
760TestNormCoverage() {
761    UChar input[1000], expect[1000], output[1000];
762    UErrorCode errorCode;
763    int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
764
765    /* create a long and nasty string with NFKC-unsafe characters */
766    inLength=0;
767
768    /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
769    input[inLength++]=HANGUL_KIYEOK;
770    input[inLength++]=HANGUL_WEO;
771    input[inLength++]=HANGUL_KIYEOK_SIOS;
772
773    input[inLength++]=HANGUL_KIYEOK;
774    input[inLength++]=HANGUL_WEO;
775    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
776
777    input[inLength++]=HANGUL_KIYEOK;
778    input[inLength++]=HANGUL_K_WEO;
779    input[inLength++]=HANGUL_KIYEOK_SIOS;
780
781    input[inLength++]=HANGUL_KIYEOK;
782    input[inLength++]=HANGUL_K_WEO;
783    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
784
785    input[inLength++]=HANGUL_K_KIYEOK;
786    input[inLength++]=HANGUL_WEO;
787    input[inLength++]=HANGUL_KIYEOK_SIOS;
788
789    input[inLength++]=HANGUL_K_KIYEOK;
790    input[inLength++]=HANGUL_WEO;
791    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
792
793    input[inLength++]=HANGUL_K_KIYEOK;
794    input[inLength++]=HANGUL_K_WEO;
795    input[inLength++]=HANGUL_KIYEOK_SIOS;
796
797    input[inLength++]=HANGUL_K_KIYEOK;
798    input[inLength++]=HANGUL_K_WEO;
799    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
800
801    /* Hangul LV with normal/compatibility Jamo T */
802    input[inLength++]=HANGUL_AC00;
803    input[inLength++]=HANGUL_KIYEOK_SIOS;
804
805    input[inLength++]=HANGUL_AC00;
806    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
807
808    /* compatibility Jamo L, V */
809    input[inLength++]=HANGUL_K_KIYEOK;
810    input[inLength++]=HANGUL_K_WEO;
811
812    hangulPrefixLength=inLength;
813
814    input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
815    input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
816    for(i=0; i<200; ++i) {
817        input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
818        input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
819        input[inLength++]=U16_LEAD(MUSICAL_STEM);
820        input[inLength++]=U16_TRAIL(MUSICAL_STEM);
821    }
822
823    /* (compatibility) Jamo L, T do not compose */
824    input[inLength++]=HANGUL_K_KIYEOK;
825    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
826
827    /* quick checks */
828    errorCode=U_ZERO_ERROR;
829    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
830        log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
831    }
832    errorCode=U_ZERO_ERROR;
833    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
834        log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
835    }
836    errorCode=U_ZERO_ERROR;
837    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
838        log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
839    }
840    errorCode=U_ZERO_ERROR;
841    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
842        log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
843    }
844    errorCode=U_ZERO_ERROR;
845    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
846        log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
847    }
848
849    /* NFKC */
850    expectLength=0;
851    expect[expectLength++]=HANGUL_SYLLABLE;
852
853    expect[expectLength++]=HANGUL_SYLLABLE;
854
855    expect[expectLength++]=HANGUL_SYLLABLE;
856
857    expect[expectLength++]=HANGUL_SYLLABLE;
858
859    expect[expectLength++]=HANGUL_SYLLABLE;
860
861    expect[expectLength++]=HANGUL_SYLLABLE;
862
863    expect[expectLength++]=HANGUL_SYLLABLE;
864
865    expect[expectLength++]=HANGUL_SYLLABLE;
866
867    expect[expectLength++]=HANGUL_AC00+3;
868
869    expect[expectLength++]=HANGUL_AC00+3;
870
871    expect[expectLength++]=HANGUL_AC00+14*28;
872
873    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
874    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
875    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
876    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
877    for(i=0; i<200; ++i) {
878        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
879        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
880    }
881    for(i=0; i<200; ++i) {
882        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
883        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
884    }
885
886    expect[expectLength++]=HANGUL_KIYEOK;
887    expect[expectLength++]=HANGUL_KIYEOK_SIOS;
888
889    /* try destination overflow first */
890    errorCode=U_ZERO_ERROR;
891    preflightLength=unorm_normalize(input, inLength,
892                           UNORM_NFKC, 0,
893                           output, 100, /* too short */
894                           &errorCode);
895    if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
896        log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
897    }
898
899    /* real NFKC */
900    errorCode=U_ZERO_ERROR;
901    length=unorm_normalize(input, inLength,
902                           UNORM_NFKC, 0,
903                           output, sizeof(output)/U_SIZEOF_UCHAR,
904                           &errorCode);
905    if(U_FAILURE(errorCode)) {
906        log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
907    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
908        log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
909        for(i=0; i<length; ++i) {
910            if(output[i]!=expect[i]) {
911                log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
912                break;
913            }
914        }
915    }
916    if(length!=preflightLength) {
917        log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
918    }
919
920    /* FCD */
921    u_memcpy(expect, input, hangulPrefixLength);
922    expectLength=hangulPrefixLength;
923
924    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
925    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
926    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
927    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
928    for(i=0; i<200; ++i) {
929        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
930        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
931    }
932    for(i=0; i<200; ++i) {
933        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
934        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
935    }
936
937    expect[expectLength++]=HANGUL_K_KIYEOK;
938    expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
939
940    errorCode=U_ZERO_ERROR;
941    length=unorm_normalize(input, inLength,
942                           UNORM_FCD, 0,
943                           output, sizeof(output)/U_SIZEOF_UCHAR,
944                           &errorCode);
945    if(U_FAILURE(errorCode)) {
946        log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
947    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
948        log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
949        for(i=0; i<length; ++i) {
950            if(output[i]!=expect[i]) {
951                log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
952                break;
953            }
954        }
955    }
956}
957
958/* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
959static void
960TestConcatenate(void) {
961    /* "re + 'sume'" */
962    static const UChar
963    left[]={
964        0x72, 0x65, 0
965    },
966    right[]={
967        0x301, 0x73, 0x75, 0x6d, 0xe9, 0
968    },
969    expect[]={
970        0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
971    };
972
973    UChar buffer[100];
974    UErrorCode errorCode;
975    int32_t length;
976
977    /* left with length, right NUL-terminated */
978    errorCode=U_ZERO_ERROR;
979    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
980    if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
981        log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
982    }
983
984    /* preflighting */
985    errorCode=U_ZERO_ERROR;
986    length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
987    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
988        log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
989    }
990
991    buffer[2]=0x5555;
992    errorCode=U_ZERO_ERROR;
993    length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
994    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
995        log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
996    }
997
998    /* enter with U_FAILURE */
999    buffer[2]=0xaaaa;
1000    errorCode=U_UNEXPECTED_TOKEN;
1001    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1002    if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1003        log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1004    }
1005
1006    /* illegal arguments */
1007    buffer[2]=0xaaaa;
1008    errorCode=U_ZERO_ERROR;
1009    length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1010    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1011        log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1012    }
1013
1014    errorCode=U_ZERO_ERROR;
1015    length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1016    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1017        log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1018    }
1019}
1020
1021enum {
1022    _PLUS=0x2b
1023};
1024
1025static const char *const _modeString[UNORM_MODE_COUNT]={
1026    "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1027};
1028
1029static void
1030_testIter(const UChar *src, int32_t srcLength,
1031          UCharIterator *iter, UNormalizationMode mode, UBool forward,
1032          const UChar *out, int32_t outLength,
1033          const int32_t *srcIndexes, int32_t srcIndexesLength) {
1034    UChar buffer[4];
1035    const UChar *expect, *outLimit, *in;
1036    int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1037    UErrorCode errorCode;
1038    UBool neededToNormalize, expectNeeded;
1039
1040    errorCode=U_ZERO_ERROR;
1041    outLimit=out+outLength;
1042    if(forward) {
1043        expect=out;
1044        i=index=0;
1045    } else {
1046        expect=outLimit;
1047        i=srcIndexesLength-2;
1048        index=srcLength;
1049    }
1050
1051    for(;;) {
1052        prevIndex=index;
1053        if(forward) {
1054            if(!iter->hasNext(iter)) {
1055                return;
1056            }
1057            length=unorm_next(iter,
1058                              buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1059                              mode, 0,
1060                              (UBool)(out!=NULL), &neededToNormalize,
1061                              &errorCode);
1062            expectIndex=srcIndexes[i+1];
1063            in=src+prevIndex;
1064            inLength=expectIndex-prevIndex;
1065
1066            if(out!=NULL) {
1067                /* get output piece from between plus signs */
1068                expectLength=0;
1069                while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1070                    ++expectLength;
1071                }
1072                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1073            } else {
1074                expect=in;
1075                expectLength=inLength;
1076                expectNeeded=FALSE;
1077            }
1078        } else {
1079            if(!iter->hasPrevious(iter)) {
1080                return;
1081            }
1082            length=unorm_previous(iter,
1083                                  buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1084                                  mode, 0,
1085                                  (UBool)(out!=NULL), &neededToNormalize,
1086                                  &errorCode);
1087            expectIndex=srcIndexes[i];
1088            in=src+expectIndex;
1089            inLength=prevIndex-expectIndex;
1090
1091            if(out!=NULL) {
1092                /* get output piece from between plus signs */
1093                expectLength=0;
1094                while(expect!=out && expect[-1]!=_PLUS) {
1095                    ++expectLength;
1096                    --expect;
1097                }
1098                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1099            } else {
1100                expect=in;
1101                expectLength=inLength;
1102                expectNeeded=FALSE;
1103            }
1104        }
1105        index=iter->getIndex(iter, UITER_CURRENT);
1106
1107        if(U_FAILURE(errorCode)) {
1108            log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1109                    forward, _modeString[mode], i, u_errorName(errorCode));
1110            return;
1111        }
1112        if(expectIndex!=index) {
1113            log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1114                    forward, _modeString[mode], i, index, expectIndex);
1115            return;
1116        }
1117        if(expectLength!=length) {
1118            log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1119                    forward, _modeString[mode], i, length, expectLength);
1120            return;
1121        }
1122        if(0!=u_memcmp(expect, buffer, length)) {
1123            log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1124                    forward, _modeString[mode], i);
1125            return;
1126        }
1127        if(neededToNormalize!=expectNeeded) {
1128        }
1129
1130        if(forward) {
1131            expect+=expectLength+1; /* go after the + */
1132            ++i;
1133        } else {
1134            --expect; /* go before the + */
1135            --i;
1136        }
1137    }
1138}
1139
1140static void
1141TestNextPrevious() {
1142    static const UChar
1143    src[]={ /* input string */
1144        0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1145    },
1146    nfd[]={ /* + separates expected output pieces */
1147        0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1148    },
1149    nfkd[]={
1150        0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1151    },
1152    nfc[]={
1153        0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1154    },
1155    nfkc[]={
1156        0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1157    },
1158    fcd[]={
1159        0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1160    };
1161
1162    /* expected iterator indexes in the source string for each iteration piece */
1163    static const int32_t
1164    nfdIndexes[]={
1165        0, 1, 2, 5, 6, 7
1166    },
1167    nfkdIndexes[]={
1168        0, 1, 2, 5, 6, 7
1169    },
1170    nfcIndexes[]={
1171        0, 1, 2, 5, 6, 7
1172    },
1173    nfkcIndexes[]={
1174        0, 1, 2, 5, 7
1175    },
1176    fcdIndexes[]={
1177        0, 1, 2, 5, 6, 7
1178    };
1179
1180    UCharIterator iter;
1181
1182    UChar buffer[4];
1183    int32_t length;
1184
1185    UBool neededToNormalize;
1186    UErrorCode errorCode;
1187
1188    uiter_setString(&iter, src, sizeof(src)/U_SIZEOF_UCHAR);
1189
1190    /* test iteration with doNormalize */
1191    iter.index=0;
1192    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1193    iter.index=0;
1194    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1195    iter.index=0;
1196    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1197    iter.index=0;
1198    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1199    iter.index=0;
1200    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1201
1202    iter.index=iter.length;
1203    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, nfd, sizeof(nfd)/U_SIZEOF_UCHAR, nfdIndexes, sizeof(nfdIndexes)/4);
1204    iter.index=iter.length;
1205    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, nfkd, sizeof(nfkd)/U_SIZEOF_UCHAR, nfkdIndexes, sizeof(nfkdIndexes)/4);
1206    iter.index=iter.length;
1207    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, nfc, sizeof(nfc)/U_SIZEOF_UCHAR, nfcIndexes, sizeof(nfcIndexes)/4);
1208    iter.index=iter.length;
1209    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, nfkc, sizeof(nfkc)/U_SIZEOF_UCHAR, nfkcIndexes, sizeof(nfkcIndexes)/4);
1210    iter.index=iter.length;
1211    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, fcd, sizeof(fcd)/U_SIZEOF_UCHAR, fcdIndexes, sizeof(fcdIndexes)/4);
1212
1213    /* test iteration without doNormalize */
1214    iter.index=0;
1215    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1216    iter.index=0;
1217    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1218    iter.index=0;
1219    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1220    iter.index=0;
1221    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1222    iter.index=0;
1223    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1224
1225    iter.index=iter.length;
1226    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1227    iter.index=iter.length;
1228    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1229    iter.index=iter.length;
1230    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1231    iter.index=iter.length;
1232    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1233    iter.index=iter.length;
1234    _testIter(src, sizeof(src)/U_SIZEOF_UCHAR, &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1235
1236    /* try without neededToNormalize */
1237    errorCode=U_ZERO_ERROR;
1238    buffer[0]=5;
1239    iter.index=1;
1240    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1241                      UNORM_NFD, 0, TRUE, NULL,
1242                      &errorCode);
1243    if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1244        log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1245        return;
1246    }
1247
1248    /* preflight */
1249    neededToNormalize=9;
1250    iter.index=1;
1251    length=unorm_next(&iter, NULL, 0,
1252                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1253                      &errorCode);
1254    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1255        log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1256        return;
1257    }
1258
1259    errorCode=U_ZERO_ERROR;
1260    buffer[0]=buffer[1]=5;
1261    neededToNormalize=9;
1262    iter.index=1;
1263    length=unorm_next(&iter, buffer, 1,
1264                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1265                      &errorCode);
1266    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1267        log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1268        return;
1269    }
1270
1271    /* no iterator */
1272    errorCode=U_ZERO_ERROR;
1273    buffer[0]=buffer[1]=5;
1274    neededToNormalize=9;
1275    iter.index=1;
1276    length=unorm_next(NULL, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1277                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1278                      &errorCode);
1279    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1280        log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1281        return;
1282    }
1283
1284    /* illegal mode */
1285    buffer[0]=buffer[1]=5;
1286    neededToNormalize=9;
1287    iter.index=1;
1288    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1289                      (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1290                      &errorCode);
1291    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1292        log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1293        return;
1294    }
1295
1296    /* error coming in */
1297    errorCode=U_MISPLACED_QUANTIFIER;
1298    buffer[0]=5;
1299    iter.index=1;
1300    length=unorm_next(&iter, buffer, sizeof(buffer)/U_SIZEOF_UCHAR,
1301                      UNORM_NFD, 0, TRUE, NULL,
1302                      &errorCode);
1303    if(errorCode!=U_MISPLACED_QUANTIFIER) {
1304        log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1305        return;
1306    }
1307}
1308
1309static void
1310TestFCNFKCClosure(void) {
1311    static const struct {
1312        UChar32 c;
1313        const UChar s[6];
1314    } tests[]={
1315        { 0x00C4, { 0 } },
1316        { 0x00E4, { 0 } },
1317        { 0x037A, { 0x0020, 0x03B9, 0 } },
1318        { 0x03D2, { 0x03C5, 0 } },
1319        { 0x20A8, { 0x0072, 0x0073, 0 } },
1320        { 0x210B, { 0x0068, 0 } },
1321        { 0x210C, { 0x0068, 0 } },
1322        { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1323        { 0x2122, { 0x0074, 0x006D, 0 } },
1324        { 0x2128, { 0x007A, 0 } },
1325        { 0x1D5DB, { 0x0068, 0 } },
1326        { 0x1D5ED, { 0x007A, 0 } },
1327        { 0x0061, { 0 } }
1328    };
1329
1330    UChar buffer[8];
1331    UErrorCode errorCode;
1332    int32_t i, length;
1333
1334    for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1335        errorCode=U_ZERO_ERROR;
1336        length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1337        if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1338            log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1339        }
1340    }
1341
1342    /* error handling */
1343    errorCode=U_ZERO_ERROR;
1344    length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1345    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1346        log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1347    }
1348
1349    length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1350    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1351        log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1352    }
1353}
1354
1355static void
1356TestQuickCheckPerCP() {
1357    UErrorCode errorCode;
1358    UChar32 c, lead, trail;
1359    UChar s[U16_MAX_LENGTH], nfd[16];
1360    int32_t length, lccc1, lccc2, tccc1, tccc2;
1361    int32_t qc1, qc2;
1362
1363    if(
1364        u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1365        u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1366        u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1367        u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1368        u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1369        u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1370    ) {
1371        log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1372    }
1373
1374    /*
1375     * compare the quick check property values for some code points
1376     * to the quick check results for checking same-code point strings
1377     */
1378    errorCode=U_ZERO_ERROR;
1379    c=0;
1380    while(c<0x110000) {
1381        length=0;
1382        U16_APPEND_UNSAFE(s, length, c);
1383
1384        qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1385        qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1386        if(qc1!=qc2) {
1387            log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1388        }
1389
1390        qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1391        qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1392        if(qc1!=qc2) {
1393            log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1394        }
1395
1396        qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1397        qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1398        if(qc1!=qc2) {
1399            log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1400        }
1401
1402        qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1403        qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1404        if(qc1!=qc2) {
1405            log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1406        }
1407
1408        length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1409        /* length-length == 0 is used to get around a compiler warning. */
1410        U16_GET(nfd, 0, length-length, length, lead);
1411        U16_GET(nfd, 0, length-1, length, trail);
1412
1413        lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1414        lccc2=u_getCombiningClass(lead);
1415        tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1416        tccc2=u_getCombiningClass(trail);
1417
1418        if(lccc1!=lccc2) {
1419            log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1420                    lccc1, lccc2, c);
1421        }
1422        if(tccc1!=tccc2) {
1423            log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1424                    tccc1, tccc2, c);
1425        }
1426
1427        /* skip some code points */
1428        c=(20*c)/19+1;
1429    }
1430}
1431
1432static void
1433TestComposition(void) {
1434    static const struct {
1435        UNormalizationMode mode;
1436        uint32_t options;
1437        UChar input[12];
1438        UChar expect[12];
1439    } cases[]={
1440        /*
1441         * special cases for UAX #15 bug
1442         * see Unicode Corrigendum #5: Normalization Idempotency
1443         * at http://unicode.org/versions/corrigendum5.html
1444         * (was Public Review Issue #29)
1445         */
1446        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1447        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1448        { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1449        { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1450
1451        /* TODO: add test cases for UNORM_FCC here (j2151) */
1452    };
1453
1454    UChar output[16];
1455    UErrorCode errorCode;
1456    int32_t i, length;
1457
1458    for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1459        errorCode=U_ZERO_ERROR;
1460        length=unorm_normalize(
1461                    cases[i].input, -1,
1462                    cases[i].mode, cases[i].options,
1463                    output, UPRV_LENGTHOF(output),
1464                    &errorCode);
1465        if( U_FAILURE(errorCode) ||
1466            length!=u_strlen(cases[i].expect) ||
1467            0!=u_memcmp(output, cases[i].expect, length)
1468        ) {
1469            log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1470        }
1471    }
1472}
1473
1474static void
1475TestGetDecomposition() {
1476    UChar decomp[32];
1477    int32_t length;
1478
1479    UErrorCode errorCode=U_ZERO_ERROR;
1480    const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1481    if(U_FAILURE(errorCode)) {
1482        log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1483        return;
1484    }
1485
1486    length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1487    if(U_FAILURE(errorCode) || length>=0) {
1488        log_err("unorm2_getDecomposition(fcc, space) failed\n");
1489    }
1490    errorCode=U_ZERO_ERROR;
1491    length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1492    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1493        log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1494    }
1495    errorCode=U_ZERO_ERROR;
1496    length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1497    if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1498        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1499    }
1500    errorCode=U_ZERO_ERROR;
1501    length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1502    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1503        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1504    }
1505    errorCode=U_ZERO_ERROR;
1506    length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1507    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1508        log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1509    }
1510    errorCode=U_ZERO_ERROR;
1511    length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1512    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1513        log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1514    }
1515}
1516
1517static void
1518TestGetRawDecomposition() {
1519    UChar decomp[32];
1520    int32_t length;
1521
1522    UErrorCode errorCode=U_ZERO_ERROR;
1523    const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1524    if(U_FAILURE(errorCode)) {
1525        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1526        return;
1527    }
1528    /*
1529     * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1530     * without recursive decomposition.
1531     */
1532
1533    length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1534    if(U_FAILURE(errorCode) || length>=0) {
1535        log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1536    }
1537    errorCode=U_ZERO_ERROR;
1538    length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1539    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1540        log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1541    }
1542    /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1543    errorCode=U_ZERO_ERROR;
1544    length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1545    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1546        log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1547    }
1548    /* U+212B ANGSTROM SIGN */
1549    errorCode=U_ZERO_ERROR;
1550    length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1551    if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1552        log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1553    }
1554    errorCode=U_ZERO_ERROR;
1555    length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1556    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1557        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1558    }
1559    /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1560    errorCode=U_ZERO_ERROR;
1561    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1562    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1563        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1564    }
1565    errorCode=U_ZERO_ERROR;
1566    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1567    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1568        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1569    }
1570    errorCode=U_ZERO_ERROR;
1571    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1572    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1573        log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1574    }
1575    errorCode=U_ZERO_ERROR;
1576    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1577    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1578        log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1579    }
1580}
1581
1582static void
1583TestAppendRestoreMiddle() {
1584    UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1585    static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1586    /* NFC: C5 is 'A with ring above' */
1587    static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1588    int32_t length;
1589    UErrorCode errorCode=U_ZERO_ERROR;
1590    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1591    if(U_FAILURE(errorCode)) {
1592        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1593        return;
1594    }
1595    /*
1596     * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1597     * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1598     * still fits into a[] but the full result still overflows this capacity.
1599     * (Let it modify the destination buffer before reallocating internally.)
1600     */
1601    length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1602    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1603        log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1604        return;
1605    }
1606    /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1607    if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1608        log_err("unorm2_append(overflow) modified the first string\n");
1609        return;
1610    }
1611    errorCode=U_ZERO_ERROR;
1612    length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1613    if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1614        log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1615        return;
1616    }
1617}
1618
1619static void
1620TestGetEasyToUseInstance() {
1621    static const UChar in[]={
1622        0xA0,  /* -> <noBreak> 0020 */
1623        0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1624    };
1625    UChar out[32];
1626    int32_t length;
1627
1628    UErrorCode errorCode=U_ZERO_ERROR;
1629    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1630    if(U_FAILURE(errorCode)) {
1631        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1632        return;
1633    }
1634    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1635    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1636        log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1637                (int)length, u_errorName(errorCode));
1638    }
1639
1640    errorCode=U_ZERO_ERROR;
1641    n2=unorm2_getNFDInstance(&errorCode);
1642    if(U_FAILURE(errorCode)) {
1643        log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1644        return;
1645    }
1646    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1647    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1648        log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1649                (int)length, u_errorName(errorCode));
1650    }
1651
1652    errorCode=U_ZERO_ERROR;
1653    n2=unorm2_getNFKCInstance(&errorCode);
1654    if(U_FAILURE(errorCode)) {
1655        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1656        return;
1657    }
1658    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1659    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1660        log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1661                (int)length, u_errorName(errorCode));
1662    }
1663
1664    errorCode=U_ZERO_ERROR;
1665    n2=unorm2_getNFKDInstance(&errorCode);
1666    if(U_FAILURE(errorCode)) {
1667        log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1668        return;
1669    }
1670    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1671    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1672        log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1673                (int)length, u_errorName(errorCode));
1674    }
1675
1676    errorCode=U_ZERO_ERROR;
1677    n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1678    if(U_FAILURE(errorCode)) {
1679        log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1680        return;
1681    }
1682    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1683    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1684        log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1685                (int)length, u_errorName(errorCode));
1686    }
1687}
1688
1689#endif /* #if !UCONFIG_NO_NORMALIZATION */
1690