1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8/********************************************************************************
9*
10* File CNORMTST.C
11*
12* Modification History:
13*        Name                     Description
14*     Madhu Katragadda            Ported for C API
15*     synwee                      added test for quick check
16*     synwee                      added test for checkFCD
17*********************************************************************************/
18/*tests for u_normalization*/
19#include "unicode/utypes.h"
20#include "unicode/unorm.h"
21#include "unicode/utf16.h"
22#include "cintltst.h"
23#include "cmemory.h"
24
25#if !UCONFIG_NO_NORMALIZATION
26
27#include <stdlib.h>
28#include <time.h>
29#include "unicode/uchar.h"
30#include "unicode/ustring.h"
31#include "unicode/unorm.h"
32#include "cnormtst.h"
33
34static void
35TestAPI(void);
36
37static void
38TestNormCoverage(void);
39
40static void
41TestConcatenate(void);
42
43static void
44TestNextPrevious(void);
45
46static void TestIsNormalized(void);
47
48static void
49TestFCNFKCClosure(void);
50
51static void
52TestQuickCheckPerCP(void);
53
54static void
55TestComposition(void);
56
57static void
58TestFCD(void);
59
60static void
61TestGetDecomposition(void);
62
63static void
64TestGetRawDecomposition(void);
65
66static void TestAppendRestoreMiddle(void);
67static void TestGetEasyToUseInstance(void);
68
69static const char* const canonTests[][3] = {
70    /* Input*/                    /*Decomposed*/                /*Composed*/
71    { "cat",                    "cat",                        "cat"                    },
72    { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
73
74    { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
75    { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
76
77    { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
78    { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
79    { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
80
81    { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
82    { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
83
84    { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
85    { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
86    { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
87
88    { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
89    { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
90
91    { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
92    { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
93
94    { "Henry IV",                "Henry IV",                    "Henry IV"                },
95    { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
96
97    { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
98    { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
99    { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
100    { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
101    { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
102    { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
103    { "", "", "" }
104};
105
106static const char* const compatTests[][3] = {
107    /* Input*/                        /*Decomposed    */                /*Composed*/
108    { "cat",                        "cat",                            "cat"                },
109
110    { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
111
112    { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
113    { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
114
115    { "Henry IV",                    "Henry IV",                        "Henry IV"            },
116    { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
117
118    { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
119    { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
120
121    { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
122
123    /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124    { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
125    { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
126    { "", "", "" }
127};
128
129static const char* const fcdTests[][3] = {
130    /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131    { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
132    { "\\u010e", "\\u010e", NULL }  /* D-caron */
133};
134
135void addNormTest(TestNode** root);
136
137void addNormTest(TestNode** root)
138{
139    addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140    addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141    addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142    addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143    addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144    addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145    addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146    addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147    addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148    addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149    addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150    addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151    addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152    addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153    addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154    addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155    addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156    addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157    addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158    addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159}
160
161static const char* const modeStrings[]={
162    "UNORM_NONE",
163    "UNORM_NFD",
164    "UNORM_NFKD",
165    "UNORM_NFC",
166    "UNORM_NFKC",
167    "UNORM_FCD",
168    "UNORM_MODE_COUNT"
169};
170
171static void TestNormCases(UNormalizationMode mode,
172                          const char* const cases[][3], int32_t lengthOfCases) {
173    int32_t x, neededLen, length2;
174    int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
175    UChar *source=NULL;
176    UChar result[16];
177    log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
178    for(x=0; x < lengthOfCases; x++)
179    {
180        UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
181        source=CharsToUChars(cases[x][0]);
182        neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
183        length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
184        if(neededLen!=length2) {
185          log_err("ERROR in unorm_normalize(%s)[%d]: "
186                  "preflight length/NUL %d!=%d preflight length/srcLength\n",
187                  modeStrings[mode], (int)x, (int)neededLen, (int)length2);
188        }
189        if(status==U_BUFFER_OVERFLOW_ERROR)
190        {
191            status=U_ZERO_ERROR;
192        }
193        length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
194        if(U_FAILURE(status) || neededLen!=length2) {
195            log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
196                         modeStrings[mode], austrdup(source), myErrorName(status));
197        } else {
198            assertEqual(result, cases[x][expIndex], x);
199        }
200        length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
201        if(U_FAILURE(status) || neededLen!=length2) {
202            log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
203                         modeStrings[mode], austrdup(source), myErrorName(status));
204        } else {
205            assertEqual(result, cases[x][expIndex], x);
206        }
207        free(source);
208    }
209}
210
211void TestDecomp() {
212    TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
213}
214
215void TestCompatDecomp() {
216    TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
217}
218
219void TestCanonDecompCompose() {
220    TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
221}
222
223void TestCompatDecompCompose() {
224    TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
225}
226
227void TestFCD() {
228    TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
229}
230
231static void assertEqual(const UChar* result, const char* expected, int32_t index)
232{
233    UChar *expectedUni = CharsToUChars(expected);
234    if(u_strcmp(result, expectedUni)!=0){
235        log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
236            austrdup(result) );
237    }
238    free(expectedUni);
239}
240
241static void TestNull_check(UChar *src, int32_t srcLen,
242                    UChar *exp, int32_t expLen,
243                    UNormalizationMode mode,
244                    const char *name)
245{
246    UErrorCode status = U_ZERO_ERROR;
247    int32_t len, i;
248
249    UChar   result[50];
250
251
252    status = U_ZERO_ERROR;
253
254    for(i=0;i<50;i++)
255      {
256        result[i] = 0xFFFD;
257      }
258
259    len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
260
261    if(U_FAILURE(status)) {
262      log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
263    } else if (len != expLen) {
264      log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
265    }
266
267    {
268      for(i=0;i<len;i++){
269        if(exp[i] != result[i]) {
270          log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
271                  name,
272                  i,
273                  exp[i],
274                  result[i]);
275          return;
276        }
277        log_verbose("     %d: \\u%04X\n", i, result[i]);
278      }
279    }
280
281    log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
282}
283
284void TestNull()
285{
286
287    UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
288    int32_t source_comp_len = 4;
289    UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
290    int32_t expect_comp_len = 3;
291
292    UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
293    int32_t source_dcmp_len = 3;
294    UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
295    int32_t expect_dcmp_len = 5;
296
297    TestNull_check(source_comp,
298                   source_comp_len,
299                   expect_comp,
300                   expect_comp_len,
301                   UNORM_NFC,
302                   "UNORM_NFC");
303
304    TestNull_check(source_dcmp,
305                   source_dcmp_len,
306                   expect_dcmp,
307                   expect_dcmp_len,
308                   UNORM_NFD,
309                   "UNORM_NFD");
310
311    TestNull_check(source_comp,
312                   source_comp_len,
313                   expect_comp,
314                   expect_comp_len,
315                   UNORM_NFKC,
316                   "UNORM_NFKC");
317
318
319}
320
321static void TestQuickCheckResultNO()
322{
323  const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
324                         0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
325  const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
326                          0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
327  const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
328                           0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
329  const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
330                           0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
331
332
333  const int SIZE = 10;
334
335  int count = 0;
336  UErrorCode error = U_ZERO_ERROR;
337
338  for (; count < SIZE; count ++)
339  {
340    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
341                                                              UNORM_NO)
342    {
343      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
344      return;
345    }
346    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
347                                                              UNORM_NO)
348    {
349      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
350      return;
351    }
352    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
353                                                              UNORM_NO)
354    {
355      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
356      return;
357    }
358    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
359                                                              UNORM_NO)
360    {
361      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
362      return;
363    }
364  }
365}
366
367
368static void TestQuickCheckResultYES()
369{
370  const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
371                         0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
372  const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
373                         0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
374  const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
375                          0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
376  const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
377                          0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
378
379  const int SIZE = 10;
380  int count = 0;
381  UErrorCode error = U_ZERO_ERROR;
382
383  UChar cp = 0;
384  while (cp < 0xA0)
385  {
386    if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
387    {
388      log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
389      return;
390    }
391    if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
392                                                             UNORM_YES)
393    {
394      log_err("ERROR in NFC quick check at U+%04x\n", cp);
395      return;
396    }
397    if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
398    {
399      log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
400      return;
401    }
402    if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
403                                                             UNORM_YES)
404    {
405      log_err("ERROR in NFKC quick check at U+%04x\n", cp);
406      return;
407    }
408    cp ++;
409  }
410
411  for (; count < SIZE; count ++)
412  {
413    if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
414                                                             UNORM_YES)
415    {
416      log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
417      return;
418    }
419    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
420                                                          != UNORM_YES)
421    {
422      log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
423      return;
424    }
425    if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
426                                                             UNORM_YES)
427    {
428      log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
429      return;
430    }
431    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
432                                                             UNORM_YES)
433    {
434      log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
435      return;
436    }
437  }
438}
439
440static void TestQuickCheckResultMAYBE()
441{
442  const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
443                         0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
444  const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
445                          0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
446
447
448  const int SIZE = 10;
449
450  int count = 0;
451  UErrorCode error = U_ZERO_ERROR;
452
453  /* NFD and NFKD does not have any MAYBE codepoints */
454  for (; count < SIZE; count ++)
455  {
456    if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
457                                                           UNORM_MAYBE)
458    {
459      log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
460      return;
461    }
462    if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
463                                                           UNORM_MAYBE)
464    {
465      log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
466      return;
467    }
468  }
469}
470
471static void TestQuickCheckStringResult()
472{
473  int count;
474  UChar *d = NULL;
475  UChar *c = NULL;
476  UErrorCode error = U_ZERO_ERROR;
477
478  for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
479  {
480    d = CharsToUChars(canonTests[count][1]);
481    c = CharsToUChars(canonTests[count][2]);
482    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
483                                                            UNORM_YES)
484    {
485      log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
486      return;
487    }
488
489    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
490                                                            UNORM_NO)
491    {
492      log_err("ERROR in NFC quick check for string at count %d\n", count);
493      return;
494    }
495
496    free(d);
497    free(c);
498  }
499
500  for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
501  {
502    d = CharsToUChars(compatTests[count][1]);
503    c = CharsToUChars(compatTests[count][2]);
504    if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
505                                                            UNORM_YES)
506    {
507      log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
508      return;
509    }
510
511    if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
512                                                            UNORM_YES)
513    {
514      log_err("ERROR in NFKC quick check for string at count %d\n", count);
515      return;
516    }
517
518    free(d);
519    free(c);
520  }
521}
522
523void TestQuickCheck()
524{
525  TestQuickCheckResultNO();
526  TestQuickCheckResultYES();
527  TestQuickCheckResultMAYBE();
528  TestQuickCheckStringResult();
529}
530
531/*
532 * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
533 * normalized, and some that are not.
534 * Here we pick some specific cases and test the C API.
535 */
536static void TestIsNormalized(void) {
537    static const UChar notNFC[][8]={            /* strings that are not in NFC */
538        { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
539        { 0xfb1d, 0 },                          /* excluded from composition */
540        { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
541        { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
542    };
543    static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
544        { 0x1100, 0x1161, 0 },                  /* Jamo compose */
545        { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
546        { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
547    };
548
549    int32_t i;
550    UErrorCode errorCode;
551
552    /* API test */
553
554    /* normal case with length>=0 (length -1 used for special cases below) */
555    errorCode=U_ZERO_ERROR;
556    if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
557        log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
558    }
559
560    /* incoming U_FAILURE */
561    errorCode=U_TRUNCATED_CHAR_FOUND;
562    (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
563    if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
564        log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
565    }
566
567    /* NULL source */
568    errorCode=U_ZERO_ERROR;
569    (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
570    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
571        log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
572    }
573
574    /* bad length */
575    errorCode=U_ZERO_ERROR;
576    (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
577    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
578        log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
579    }
580
581    /* specific cases */
582    for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
583        errorCode=U_ZERO_ERROR;
584        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
585            log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
586        }
587        errorCode=U_ZERO_ERROR;
588        if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
589            log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
590        }
591    }
592    for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
593        errorCode=U_ZERO_ERROR;
594        if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
595            log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
596        }
597    }
598}
599
600void TestCheckFCD()
601{
602  UErrorCode status = U_ZERO_ERROR;
603  static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
604                         0x0A};
605  static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
606                          0x02B9, 0x0314, 0x0315, 0x0316};
607  static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
608                         0x0050, 0x0730, 0x09EE, 0x1E10};
609
610  static const UChar datastr[][5] =
611  { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
612    {0x0061, 0x030A, 0x00E2, 0x0323, 0},
613    {0x0061, 0x0323, 0x00E2, 0x0323, 0},
614    {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
615  static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
616
617  static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
618                            0x6a,
619                            0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
620                            0xea,
621                            0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
622                            0x0307, 0x0308, 0x0309, 0x030a,
623                            0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
624                            0x0327, 0x0328, 0x0329, 0x032a,
625                            0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
626                            0x1e07, 0x1e08, 0x1e09, 0x1e0a};
627
628  int count = 0;
629
630  if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
631    log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
632  if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
633    log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
634  if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
635    log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
636
637  if (U_FAILURE(status))
638    log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
639
640  while (count < 4)
641  {
642    UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
643    if (U_FAILURE(status)) {
644      log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
645      break;
646    }
647    else {
648      if (result[count] != fcdresult) {
649        log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
650                 result[count]);
651      }
652    }
653    count ++;
654  }
655
656  /* random checks of long strings */
657  status = U_ZERO_ERROR;
658  srand((unsigned)time( NULL ));
659
660  for (count = 0; count < 50; count ++)
661  {
662    int size = 0;
663    UBool testresult = UNORM_YES;
664    UChar data[20];
665    UChar norm[100];
666    UChar nfd[100];
667    int normsize = 0;
668    int nfdsize = 0;
669
670    while (size != 19) {
671      data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
672      log_verbose("0x%x", data[size]);
673      normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
674                                  norm + normsize, 100 - normsize, &status);
675      if (U_FAILURE(status)) {
676        log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
677        break;
678      }
679      size ++;
680    }
681    log_verbose("\n");
682
683    nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
684                              nfd, 100, &status);
685    if (U_FAILURE(status)) {
686      log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
687    }
688
689    if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
690      testresult = UNORM_NO;
691    }
692    if (testresult == UNORM_YES) {
693      log_verbose("result UNORM_YES\n");
694    }
695    else {
696      log_verbose("result UNORM_NO\n");
697    }
698
699    if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
700      log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
701    }
702  }
703}
704
705static void
706TestAPI() {
707    static const UChar in[]={ 0x68, 0xe4 };
708    UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
709    UErrorCode errorCode;
710    int32_t length;
711
712    /* try preflighting */
713    errorCode=U_ZERO_ERROR;
714    length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
715    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
716        log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
717        return;
718    }
719
720    errorCode=U_ZERO_ERROR;
721    length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
722    if(U_FAILURE(errorCode)) {
723        log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
724        return;
725    }
726    if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
727        log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
728        return;
729    }
730    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
731    if(U_FAILURE(errorCode)) {
732        log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
733        return;
734    }
735    length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
736    if(U_FAILURE(errorCode)) {
737        log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
738        return;
739    }
740}
741
742/* test cases to improve test code coverage */
743enum {
744    HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
745    HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
746    HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
747
748    HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
749    HANGUL_WEO=0x116f,              /* Jamo V U+116f */
750    HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
751
752    HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
753    HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
754
755    MUSICAL_VOID_NOTEHEAD=0x1d157,
756    MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
757    MUSICAL_STEM=0x1d165,       /* cc=216 */
758    MUSICAL_STACCATO=0x1d17c    /* cc=220 */
759};
760
761static void
762TestNormCoverage() {
763    UChar input[1000], expect[1000], output[1000];
764    UErrorCode errorCode;
765    int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
766
767    /* create a long and nasty string with NFKC-unsafe characters */
768    inLength=0;
769
770    /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
771    input[inLength++]=HANGUL_KIYEOK;
772    input[inLength++]=HANGUL_WEO;
773    input[inLength++]=HANGUL_KIYEOK_SIOS;
774
775    input[inLength++]=HANGUL_KIYEOK;
776    input[inLength++]=HANGUL_WEO;
777    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
778
779    input[inLength++]=HANGUL_KIYEOK;
780    input[inLength++]=HANGUL_K_WEO;
781    input[inLength++]=HANGUL_KIYEOK_SIOS;
782
783    input[inLength++]=HANGUL_KIYEOK;
784    input[inLength++]=HANGUL_K_WEO;
785    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
786
787    input[inLength++]=HANGUL_K_KIYEOK;
788    input[inLength++]=HANGUL_WEO;
789    input[inLength++]=HANGUL_KIYEOK_SIOS;
790
791    input[inLength++]=HANGUL_K_KIYEOK;
792    input[inLength++]=HANGUL_WEO;
793    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
794
795    input[inLength++]=HANGUL_K_KIYEOK;
796    input[inLength++]=HANGUL_K_WEO;
797    input[inLength++]=HANGUL_KIYEOK_SIOS;
798
799    input[inLength++]=HANGUL_K_KIYEOK;
800    input[inLength++]=HANGUL_K_WEO;
801    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
802
803    /* Hangul LV with normal/compatibility Jamo T */
804    input[inLength++]=HANGUL_AC00;
805    input[inLength++]=HANGUL_KIYEOK_SIOS;
806
807    input[inLength++]=HANGUL_AC00;
808    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
809
810    /* compatibility Jamo L, V */
811    input[inLength++]=HANGUL_K_KIYEOK;
812    input[inLength++]=HANGUL_K_WEO;
813
814    hangulPrefixLength=inLength;
815
816    input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
817    input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
818    for(i=0; i<200; ++i) {
819        input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
820        input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
821        input[inLength++]=U16_LEAD(MUSICAL_STEM);
822        input[inLength++]=U16_TRAIL(MUSICAL_STEM);
823    }
824
825    /* (compatibility) Jamo L, T do not compose */
826    input[inLength++]=HANGUL_K_KIYEOK;
827    input[inLength++]=HANGUL_K_KIYEOK_SIOS;
828
829    /* quick checks */
830    errorCode=U_ZERO_ERROR;
831    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
832        log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
833    }
834    errorCode=U_ZERO_ERROR;
835    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
836        log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
837    }
838    errorCode=U_ZERO_ERROR;
839    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
840        log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
841    }
842    errorCode=U_ZERO_ERROR;
843    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
844        log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
845    }
846    errorCode=U_ZERO_ERROR;
847    if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
848        log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
849    }
850
851    /* NFKC */
852    expectLength=0;
853    expect[expectLength++]=HANGUL_SYLLABLE;
854
855    expect[expectLength++]=HANGUL_SYLLABLE;
856
857    expect[expectLength++]=HANGUL_SYLLABLE;
858
859    expect[expectLength++]=HANGUL_SYLLABLE;
860
861    expect[expectLength++]=HANGUL_SYLLABLE;
862
863    expect[expectLength++]=HANGUL_SYLLABLE;
864
865    expect[expectLength++]=HANGUL_SYLLABLE;
866
867    expect[expectLength++]=HANGUL_SYLLABLE;
868
869    expect[expectLength++]=HANGUL_AC00+3;
870
871    expect[expectLength++]=HANGUL_AC00+3;
872
873    expect[expectLength++]=HANGUL_AC00+14*28;
874
875    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
876    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
877    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
878    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
879    for(i=0; i<200; ++i) {
880        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
881        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
882    }
883    for(i=0; i<200; ++i) {
884        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
885        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
886    }
887
888    expect[expectLength++]=HANGUL_KIYEOK;
889    expect[expectLength++]=HANGUL_KIYEOK_SIOS;
890
891    /* try destination overflow first */
892    errorCode=U_ZERO_ERROR;
893    preflightLength=unorm_normalize(input, inLength,
894                           UNORM_NFKC, 0,
895                           output, 100, /* too short */
896                           &errorCode);
897    if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
898        log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
899    }
900
901    /* real NFKC */
902    errorCode=U_ZERO_ERROR;
903    length=unorm_normalize(input, inLength,
904                           UNORM_NFKC, 0,
905                           output, UPRV_LENGTHOF(output),
906                           &errorCode);
907    if(U_FAILURE(errorCode)) {
908        log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
909    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
910        log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
911        for(i=0; i<length; ++i) {
912            if(output[i]!=expect[i]) {
913                log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
914                break;
915            }
916        }
917    }
918    if(length!=preflightLength) {
919        log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
920    }
921
922    /* FCD */
923    u_memcpy(expect, input, hangulPrefixLength);
924    expectLength=hangulPrefixLength;
925
926    expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
927    expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
928    expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
929    expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
930    for(i=0; i<200; ++i) {
931        expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
932        expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
933    }
934    for(i=0; i<200; ++i) {
935        expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
936        expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
937    }
938
939    expect[expectLength++]=HANGUL_K_KIYEOK;
940    expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
941
942    errorCode=U_ZERO_ERROR;
943    length=unorm_normalize(input, inLength,
944                           UNORM_FCD, 0,
945                           output, UPRV_LENGTHOF(output),
946                           &errorCode);
947    if(U_FAILURE(errorCode)) {
948        log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
949    } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
950        log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
951        for(i=0; i<length; ++i) {
952            if(output[i]!=expect[i]) {
953                log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
954                break;
955            }
956        }
957    }
958}
959
960/* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
961static void
962TestConcatenate(void) {
963    /* "re + 'sume'" */
964    static const UChar
965    left[]={
966        0x72, 0x65, 0
967    },
968    right[]={
969        0x301, 0x73, 0x75, 0x6d, 0xe9, 0
970    },
971    expect[]={
972        0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
973    };
974
975    UChar buffer[100];
976    UErrorCode errorCode;
977    int32_t length;
978
979    /* left with length, right NUL-terminated */
980    errorCode=U_ZERO_ERROR;
981    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
982    if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
983        log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
984    }
985
986    /* preflighting */
987    errorCode=U_ZERO_ERROR;
988    length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
989    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
990        log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
991    }
992
993    buffer[2]=0x5555;
994    errorCode=U_ZERO_ERROR;
995    length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
996    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
997        log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
998    }
999
1000    /* enter with U_FAILURE */
1001    buffer[2]=0xaaaa;
1002    errorCode=U_UNEXPECTED_TOKEN;
1003    length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1004    if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1005        log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1006    }
1007
1008    /* illegal arguments */
1009    buffer[2]=0xaaaa;
1010    errorCode=U_ZERO_ERROR;
1011    length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1012    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1013        log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1014    }
1015
1016    errorCode=U_ZERO_ERROR;
1017    length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1018    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1019        log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1020    }
1021}
1022
1023enum {
1024    _PLUS=0x2b
1025};
1026
1027static const char *const _modeString[UNORM_MODE_COUNT]={
1028    "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1029};
1030
1031static void
1032_testIter(const UChar *src, int32_t srcLength,
1033          UCharIterator *iter, UNormalizationMode mode, UBool forward,
1034          const UChar *out, int32_t outLength,
1035          const int32_t *srcIndexes, int32_t srcIndexesLength) {
1036    UChar buffer[4];
1037    const UChar *expect, *outLimit, *in;
1038    int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1039    UErrorCode errorCode;
1040    UBool neededToNormalize, expectNeeded;
1041
1042    errorCode=U_ZERO_ERROR;
1043    outLimit=out+outLength;
1044    if(forward) {
1045        expect=out;
1046        i=index=0;
1047    } else {
1048        expect=outLimit;
1049        i=srcIndexesLength-2;
1050        index=srcLength;
1051    }
1052
1053    for(;;) {
1054        prevIndex=index;
1055        if(forward) {
1056            if(!iter->hasNext(iter)) {
1057                return;
1058            }
1059            length=unorm_next(iter,
1060                              buffer, UPRV_LENGTHOF(buffer),
1061                              mode, 0,
1062                              (UBool)(out!=NULL), &neededToNormalize,
1063                              &errorCode);
1064            expectIndex=srcIndexes[i+1];
1065            in=src+prevIndex;
1066            inLength=expectIndex-prevIndex;
1067
1068            if(out!=NULL) {
1069                /* get output piece from between plus signs */
1070                expectLength=0;
1071                while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1072                    ++expectLength;
1073                }
1074                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1075            } else {
1076                expect=in;
1077                expectLength=inLength;
1078                expectNeeded=FALSE;
1079            }
1080        } else {
1081            if(!iter->hasPrevious(iter)) {
1082                return;
1083            }
1084            length=unorm_previous(iter,
1085                                  buffer, UPRV_LENGTHOF(buffer),
1086                                  mode, 0,
1087                                  (UBool)(out!=NULL), &neededToNormalize,
1088                                  &errorCode);
1089            expectIndex=srcIndexes[i];
1090            in=src+expectIndex;
1091            inLength=prevIndex-expectIndex;
1092
1093            if(out!=NULL) {
1094                /* get output piece from between plus signs */
1095                expectLength=0;
1096                while(expect!=out && expect[-1]!=_PLUS) {
1097                    ++expectLength;
1098                    --expect;
1099                }
1100                expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1101            } else {
1102                expect=in;
1103                expectLength=inLength;
1104                expectNeeded=FALSE;
1105            }
1106        }
1107        index=iter->getIndex(iter, UITER_CURRENT);
1108
1109        if(U_FAILURE(errorCode)) {
1110            log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1111                    forward, _modeString[mode], i, u_errorName(errorCode));
1112            return;
1113        }
1114        if(expectIndex!=index) {
1115            log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1116                    forward, _modeString[mode], i, index, expectIndex);
1117            return;
1118        }
1119        if(expectLength!=length) {
1120            log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1121                    forward, _modeString[mode], i, length, expectLength);
1122            return;
1123        }
1124        if(0!=u_memcmp(expect, buffer, length)) {
1125            log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1126                    forward, _modeString[mode], i);
1127            return;
1128        }
1129        if(neededToNormalize!=expectNeeded) {
1130        }
1131
1132        if(forward) {
1133            expect+=expectLength+1; /* go after the + */
1134            ++i;
1135        } else {
1136            --expect; /* go before the + */
1137            --i;
1138        }
1139    }
1140}
1141
1142static void
1143TestNextPrevious() {
1144    static const UChar
1145    src[]={ /* input string */
1146        0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1147    },
1148    nfd[]={ /* + separates expected output pieces */
1149        0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1150    },
1151    nfkd[]={
1152        0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1153    },
1154    nfc[]={
1155        0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1156    },
1157    nfkc[]={
1158        0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1159    },
1160    fcd[]={
1161        0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1162    };
1163
1164    /* expected iterator indexes in the source string for each iteration piece */
1165    static const int32_t
1166    nfdIndexes[]={
1167        0, 1, 2, 5, 6, 7
1168    },
1169    nfkdIndexes[]={
1170        0, 1, 2, 5, 6, 7
1171    },
1172    nfcIndexes[]={
1173        0, 1, 2, 5, 6, 7
1174    },
1175    nfkcIndexes[]={
1176        0, 1, 2, 5, 7
1177    },
1178    fcdIndexes[]={
1179        0, 1, 2, 5, 6, 7
1180    };
1181
1182    UCharIterator iter;
1183
1184    UChar buffer[4];
1185    int32_t length;
1186
1187    UBool neededToNormalize;
1188    UErrorCode errorCode;
1189
1190    uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1191
1192    /* test iteration with doNormalize */
1193    iter.index=0;
1194    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1195    iter.index=0;
1196    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1197    iter.index=0;
1198    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1199    iter.index=0;
1200    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1201    iter.index=0;
1202    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1203
1204    iter.index=iter.length;
1205    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1206    iter.index=iter.length;
1207    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1208    iter.index=iter.length;
1209    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1210    iter.index=iter.length;
1211    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1212    iter.index=iter.length;
1213    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1214
1215    /* test iteration without doNormalize */
1216    iter.index=0;
1217    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1218    iter.index=0;
1219    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1220    iter.index=0;
1221    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1222    iter.index=0;
1223    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1224    iter.index=0;
1225    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1226
1227    iter.index=iter.length;
1228    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1229    iter.index=iter.length;
1230    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1231    iter.index=iter.length;
1232    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1233    iter.index=iter.length;
1234    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1235    iter.index=iter.length;
1236    _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1237
1238    /* try without neededToNormalize */
1239    errorCode=U_ZERO_ERROR;
1240    buffer[0]=5;
1241    iter.index=1;
1242    length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1243                      UNORM_NFD, 0, TRUE, NULL,
1244                      &errorCode);
1245    if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1246        log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1247        return;
1248    }
1249
1250    /* preflight */
1251    neededToNormalize=9;
1252    iter.index=1;
1253    length=unorm_next(&iter, NULL, 0,
1254                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1255                      &errorCode);
1256    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1257        log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1258        return;
1259    }
1260
1261    errorCode=U_ZERO_ERROR;
1262    buffer[0]=buffer[1]=5;
1263    neededToNormalize=9;
1264    iter.index=1;
1265    length=unorm_next(&iter, buffer, 1,
1266                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1267                      &errorCode);
1268    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1269        log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1270        return;
1271    }
1272
1273    /* no iterator */
1274    errorCode=U_ZERO_ERROR;
1275    buffer[0]=buffer[1]=5;
1276    neededToNormalize=9;
1277    iter.index=1;
1278    length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1279                      UNORM_NFD, 0, TRUE, &neededToNormalize,
1280                      &errorCode);
1281    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1282        log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1283        return;
1284    }
1285
1286    /* illegal mode */
1287    buffer[0]=buffer[1]=5;
1288    neededToNormalize=9;
1289    iter.index=1;
1290    length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1291                      (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1292                      &errorCode);
1293    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1294        log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1295        return;
1296    }
1297
1298    /* error coming in */
1299    errorCode=U_MISPLACED_QUANTIFIER;
1300    buffer[0]=5;
1301    iter.index=1;
1302    length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1303                      UNORM_NFD, 0, TRUE, NULL,
1304                      &errorCode);
1305    if(errorCode!=U_MISPLACED_QUANTIFIER) {
1306        log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1307        return;
1308    }
1309}
1310
1311static void
1312TestFCNFKCClosure(void) {
1313    static const struct {
1314        UChar32 c;
1315        const UChar s[6];
1316    } tests[]={
1317        { 0x00C4, { 0 } },
1318        { 0x00E4, { 0 } },
1319        { 0x037A, { 0x0020, 0x03B9, 0 } },
1320        { 0x03D2, { 0x03C5, 0 } },
1321        { 0x20A8, { 0x0072, 0x0073, 0 } },
1322        { 0x210B, { 0x0068, 0 } },
1323        { 0x210C, { 0x0068, 0 } },
1324        { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1325        { 0x2122, { 0x0074, 0x006D, 0 } },
1326        { 0x2128, { 0x007A, 0 } },
1327        { 0x1D5DB, { 0x0068, 0 } },
1328        { 0x1D5ED, { 0x007A, 0 } },
1329        { 0x0061, { 0 } }
1330    };
1331
1332    UChar buffer[8];
1333    UErrorCode errorCode;
1334    int32_t i, length;
1335
1336    for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1337        errorCode=U_ZERO_ERROR;
1338        length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1339        if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1340            log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1341        }
1342    }
1343
1344    /* error handling */
1345    errorCode=U_ZERO_ERROR;
1346    length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1347    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1348        log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1349    }
1350
1351    length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1352    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1353        log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1354    }
1355}
1356
1357static void
1358TestQuickCheckPerCP() {
1359    UErrorCode errorCode;
1360    UChar32 c, lead, trail;
1361    UChar s[U16_MAX_LENGTH], nfd[16];
1362    int32_t length, lccc1, lccc2, tccc1, tccc2;
1363    int32_t qc1, qc2;
1364
1365    if(
1366        u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1367        u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368        u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1369        u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370        u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1371        u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1372    ) {
1373        log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1374    }
1375
1376    /*
1377     * compare the quick check property values for some code points
1378     * to the quick check results for checking same-code point strings
1379     */
1380    errorCode=U_ZERO_ERROR;
1381    c=0;
1382    while(c<0x110000) {
1383        length=0;
1384        U16_APPEND_UNSAFE(s, length, c);
1385
1386        qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1387        qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1388        if(qc1!=qc2) {
1389            log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1390        }
1391
1392        qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1393        qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1394        if(qc1!=qc2) {
1395            log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1396        }
1397
1398        qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1399        qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1400        if(qc1!=qc2) {
1401            log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1402        }
1403
1404        qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1405        qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1406        if(qc1!=qc2) {
1407            log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1408        }
1409
1410        length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1411        /* length-length == 0 is used to get around a compiler warning. */
1412        U16_GET(nfd, 0, length-length, length, lead);
1413        U16_GET(nfd, 0, length-1, length, trail);
1414
1415        lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1416        lccc2=u_getCombiningClass(lead);
1417        tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1418        tccc2=u_getCombiningClass(trail);
1419
1420        if(lccc1!=lccc2) {
1421            log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1422                    lccc1, lccc2, c);
1423        }
1424        if(tccc1!=tccc2) {
1425            log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1426                    tccc1, tccc2, c);
1427        }
1428
1429        /* skip some code points */
1430        c=(20*c)/19+1;
1431    }
1432}
1433
1434static void
1435TestComposition(void) {
1436    static const struct {
1437        UNormalizationMode mode;
1438        uint32_t options;
1439        UChar input[12];
1440        UChar expect[12];
1441    } cases[]={
1442        /*
1443         * special cases for UAX #15 bug
1444         * see Unicode Corrigendum #5: Normalization Idempotency
1445         * at http://unicode.org/versions/corrigendum5.html
1446         * (was Public Review Issue #29)
1447         */
1448        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1449        { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1450        { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1451        { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1452
1453        /* TODO: add test cases for UNORM_FCC here (j2151) */
1454    };
1455
1456    UChar output[16];
1457    UErrorCode errorCode;
1458    int32_t i, length;
1459
1460    for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1461        errorCode=U_ZERO_ERROR;
1462        length=unorm_normalize(
1463                    cases[i].input, -1,
1464                    cases[i].mode, cases[i].options,
1465                    output, UPRV_LENGTHOF(output),
1466                    &errorCode);
1467        if( U_FAILURE(errorCode) ||
1468            length!=u_strlen(cases[i].expect) ||
1469            0!=u_memcmp(output, cases[i].expect, length)
1470        ) {
1471            log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1472        }
1473    }
1474}
1475
1476static void
1477TestGetDecomposition() {
1478    UChar decomp[32];
1479    int32_t length;
1480
1481    UErrorCode errorCode=U_ZERO_ERROR;
1482    const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1483    if(U_FAILURE(errorCode)) {
1484        log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1485        return;
1486    }
1487
1488    length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1489    if(U_FAILURE(errorCode) || length>=0) {
1490        log_err("unorm2_getDecomposition(fcc, space) failed\n");
1491    }
1492    errorCode=U_ZERO_ERROR;
1493    length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1494    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1495        log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1496    }
1497    errorCode=U_ZERO_ERROR;
1498    length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499    if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1500        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1501    }
1502    errorCode=U_ZERO_ERROR;
1503    length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1504    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1505        log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1506    }
1507    errorCode=U_ZERO_ERROR;
1508    length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1509    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1510        log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1511    }
1512    errorCode=U_ZERO_ERROR;
1513    length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1514    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1515        log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1516    }
1517}
1518
1519static void
1520TestGetRawDecomposition() {
1521    UChar decomp[32];
1522    int32_t length;
1523
1524    UErrorCode errorCode=U_ZERO_ERROR;
1525    const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1526    if(U_FAILURE(errorCode)) {
1527        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1528        return;
1529    }
1530    /*
1531     * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1532     * without recursive decomposition.
1533     */
1534
1535    length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1536    if(U_FAILURE(errorCode) || length>=0) {
1537        log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1538    }
1539    errorCode=U_ZERO_ERROR;
1540    length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1541    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1542        log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1543    }
1544    /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1545    errorCode=U_ZERO_ERROR;
1546    length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1547    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1548        log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1549    }
1550    /* U+212B ANGSTROM SIGN */
1551    errorCode=U_ZERO_ERROR;
1552    length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1553    if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1554        log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1555    }
1556    errorCode=U_ZERO_ERROR;
1557    length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1558    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1559        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1560    }
1561    /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1562    errorCode=U_ZERO_ERROR;
1563    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1564    if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1565        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1566    }
1567    errorCode=U_ZERO_ERROR;
1568    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1569    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1570        log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1571    }
1572    errorCode=U_ZERO_ERROR;
1573    length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1574    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1575        log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1576    }
1577    errorCode=U_ZERO_ERROR;
1578    length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1579    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1580        log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1581    }
1582}
1583
1584static void
1585TestAppendRestoreMiddle() {
1586    UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1587    static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1588    /* NFC: C5 is 'A with ring above' */
1589    static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1590    int32_t length;
1591    UErrorCode errorCode=U_ZERO_ERROR;
1592    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1593    if(U_FAILURE(errorCode)) {
1594        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1595        return;
1596    }
1597    /*
1598     * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1599     * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1600     * still fits into a[] but the full result still overflows this capacity.
1601     * (Let it modify the destination buffer before reallocating internally.)
1602     */
1603    length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1604    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1605        log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1606        return;
1607    }
1608    /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1609    if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1610        log_err("unorm2_append(overflow) modified the first string\n");
1611        return;
1612    }
1613    errorCode=U_ZERO_ERROR;
1614    length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1615    if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1616        log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1617        return;
1618    }
1619}
1620
1621static void
1622TestGetEasyToUseInstance() {
1623    static const UChar in[]={
1624        0xA0,  /* -> <noBreak> 0020 */
1625        0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1626    };
1627    UChar out[32];
1628    int32_t length;
1629
1630    UErrorCode errorCode=U_ZERO_ERROR;
1631    const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1632    if(U_FAILURE(errorCode)) {
1633        log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1634        return;
1635    }
1636    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1637    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1638        log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1639                (int)length, u_errorName(errorCode));
1640    }
1641
1642    errorCode=U_ZERO_ERROR;
1643    n2=unorm2_getNFDInstance(&errorCode);
1644    if(U_FAILURE(errorCode)) {
1645        log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1646        return;
1647    }
1648    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1649    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1650        log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1651                (int)length, u_errorName(errorCode));
1652    }
1653
1654    errorCode=U_ZERO_ERROR;
1655    n2=unorm2_getNFKCInstance(&errorCode);
1656    if(U_FAILURE(errorCode)) {
1657        log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1658        return;
1659    }
1660    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1661    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1662        log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1663                (int)length, u_errorName(errorCode));
1664    }
1665
1666    errorCode=U_ZERO_ERROR;
1667    n2=unorm2_getNFKDInstance(&errorCode);
1668    if(U_FAILURE(errorCode)) {
1669        log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1670        return;
1671    }
1672    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1673    if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1674        log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1675                (int)length, u_errorName(errorCode));
1676    }
1677
1678    errorCode=U_ZERO_ERROR;
1679    n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1680    if(U_FAILURE(errorCode)) {
1681        log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1682        return;
1683    }
1684    length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1685    if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1686        log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1687                (int)length, u_errorName(errorCode));
1688    }
1689}
1690
1691#endif /* #if !UCONFIG_NO_NORMALIZATION */
1692