1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10package com.ibm.icu.dev.test.normalizer;
11
12import java.text.StringCharacterIterator;
13import java.util.Random;
14
15import org.junit.Test;
16
17import com.ibm.icu.dev.test.TestFmwk;
18import com.ibm.icu.impl.Norm2AllModes;
19import com.ibm.icu.impl.Normalizer2Impl;
20import com.ibm.icu.impl.USerializedSet;
21import com.ibm.icu.impl.Utility;
22import com.ibm.icu.lang.UCharacter;
23import com.ibm.icu.lang.UCharacterCategory;
24import com.ibm.icu.lang.UProperty;
25import com.ibm.icu.text.FilteredNormalizer2;
26import com.ibm.icu.text.Normalizer;
27import com.ibm.icu.text.Normalizer2;
28import com.ibm.icu.text.UCharacterIterator;
29import com.ibm.icu.text.UTF16;
30import com.ibm.icu.text.UnicodeSet;
31import com.ibm.icu.text.UnicodeSetIterator;
32
33
34public class BasicTest extends TestFmwk {
35    String[][] canonTests = {
36        // Input                Decomposed              Composed
37        { "cat",                "cat",                  "cat"               },
38        { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
39
40        { "\u1e0a",             "D\u0307",              "\u1e0a"            }, // D-dot_above
41        { "D\u0307",            "D\u0307",              "\u1e0a"            }, // D dot_above
42
43        { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_below dot_above
44        { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_above dot_below
45        { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      }, // D dot_below dot_above
46
47        { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
48        { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
49
50        { "\u1E14",             "E\u0304\u0300",        "\u1E14"            }, // E-macron-grave
51        { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            }, // E-macron + grave
52        { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      }, // E-grave + macron
53
54        { "\u212b",             "A\u030a",              "\u00c5"            }, // angstrom_sign
55        { "\u00c5",             "A\u030a",              "\u00c5"            }, // A-ring
56
57        { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
58        { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
59
60        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        }, //updated with 3.0
61        { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     }, //updated with 3.0
62
63        { "Henry IV",           "Henry IV",             "Henry IV"          },
64        { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
65
66        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
67        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
68        { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      }, // hw_ka + hw_ten
69        { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      }, // ka + hw_ten
70        { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      }, // hw_ka + ten
71
72        { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
73        {"\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e","\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165", "\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165"},
74    };
75
76    String[][] compatTests = {
77            // Input                Decomposed              Composed
78        { "cat",                 "cat",                     "cat"           },
79        { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     }, // Alef-Lamed vs. Alef, Lamed
80
81        { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
82        { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        }, // ffi ligature -> f + f + i
83
84        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },        //updated for 3.0
85        { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        }, // ffi ligature -> f + f + i
86
87        { "Henry IV",           "Henry IV",             "Henry IV"          },
88        { "Henry \u2163",       "Henry IV",             "Henry IV"          },
89
90        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
91        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
92
93        { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + ten
94
95        /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
96        { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + hw_ten
97        { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // ka + hw_ten
98
99    };
100
101    // With Canonical decomposition, Hangul syllables should get decomposed
102    // into Jamo, but Jamo characters should not be decomposed into
103    // conjoining Jamo
104    String[][] hangulCanon = {
105        // Input                Decomposed              Composed
106        { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
107        { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
108    };
109
110    // With compatibility decomposition turned on,
111    // it should go all the way down to conjoining Jamo characters.
112    // THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
113    String[][] hangulCompat = {
114        // Input        Decomposed                          Composed
115        // { "\ud4db",     "\u1111\u116e\u1175\u11af\u11c2",   "\ud478\u1175\u11af\u11c2"  },
116    };
117
118    @Test
119    public void TestHangulCompose()
120                throws Exception{
121        // Make sure that the static composition methods work
122        logln("Canonical composition...");
123        staticTest(Normalizer.NFC, hangulCanon,  2);
124        logln("Compatibility composition...");
125        staticTest(Normalizer.NFKC, hangulCompat, 2);
126        // Now try iterative composition....
127        logln("Iterative composition...");
128        Normalizer norm = new Normalizer("", Normalizer.NFC,0);
129        iterateTest(norm, hangulCanon, 2);
130
131        norm.setMode(Normalizer.NFKD);
132        iterateTest(norm, hangulCompat, 2);
133
134        // And finally, make sure you can do it in reverse too
135        logln("Reverse iteration...");
136        norm.setMode(Normalizer.NFC);
137        backAndForth(norm, hangulCanon);
138     }
139
140    @Test
141    public void TestHangulDecomp() throws Exception{
142        // Make sure that the static decomposition methods work
143        logln("Canonical decomposition...");
144        staticTest(Normalizer.NFD, hangulCanon,  1);
145        logln("Compatibility decomposition...");
146        staticTest(Normalizer.NFKD, hangulCompat, 1);
147
148         // Now the iterative decomposition methods...
149        logln("Iterative decomposition...");
150        Normalizer norm = new Normalizer("", Normalizer.NFD,0);
151        iterateTest(norm, hangulCanon, 1);
152
153        norm.setMode(Normalizer.NFKD);
154        iterateTest(norm, hangulCompat, 1);
155
156        // And finally, make sure you can do it in reverse too
157        logln("Reverse iteration...");
158        norm.setMode(Normalizer.NFD);
159        backAndForth(norm, hangulCanon);
160    }
161    @Test
162    public void TestNone() throws Exception{
163        Normalizer norm = new Normalizer("", Normalizer.NONE,0);
164        iterateTest(norm, canonTests, 0);
165        staticTest(Normalizer.NONE, canonTests, 0);
166    }
167    @Test
168    public void TestDecomp() throws Exception{
169        Normalizer norm = new Normalizer("", Normalizer.NFD,0);
170        iterateTest(norm, canonTests, 1);
171        staticTest(Normalizer.NFD, canonTests, 1);
172        decomposeTest(Normalizer.NFD, canonTests, 1);
173    }
174
175    @Test
176    public void TestCompatDecomp() throws Exception{
177        Normalizer norm = new Normalizer("", Normalizer.NFKD,0);
178        iterateTest(norm, compatTests, 1);
179        staticTest(Normalizer.NFKD,compatTests, 1);
180        decomposeTest(Normalizer.NFKD,compatTests, 1);
181    }
182
183    @Test
184    public void TestCanonCompose() throws Exception{
185        Normalizer norm = new Normalizer("", Normalizer.NFC,0);
186        iterateTest(norm, canonTests, 2);
187        staticTest(Normalizer.NFC, canonTests, 2);
188        composeTest(Normalizer.NFC, canonTests, 2);
189    }
190
191    @Test
192    public void TestCompatCompose() throws Exception{
193        Normalizer norm = new Normalizer("", Normalizer.NFKC,0);
194        iterateTest(norm, compatTests, 2);
195        staticTest(Normalizer.NFKC,compatTests, 2);
196        composeTest(Normalizer.NFKC,compatTests, 2);
197    }
198
199    @Test
200    public void TestExplodingBase() throws Exception{
201        // \u017f - Latin small letter long s
202        // \u0307 - combining dot above
203        // \u1e61 - Latin small letter s with dot above
204        // \u1e9b - Latin small letter long s with dot above
205        String[][] canon = {
206            // Input                Decomposed              Composed
207            { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
208            { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
209        };
210        String[][] compat = {
211            // Input                Decomposed              Composed
212            { "\u017f",        "s",              "s"           },
213            { "\u1e9b",        "s\u0307",        "\u1e61"      },
214        };
215
216        staticTest(Normalizer.NFD, canon,  1);
217        staticTest(Normalizer.NFC, canon,  2);
218
219        staticTest(Normalizer.NFKD, compat, 1);
220        staticTest(Normalizer.NFKC, compat, 2);
221
222    }
223
224    /**
225     * The Tibetan vowel sign AA, 0f71, was messed up prior to
226     * Unicode version 2.1.9.
227     * Once 2.1.9 or 3.0 is released, uncomment this test.
228     */
229    @Test
230    public void TestTibetan() throws Exception{
231        String[][] decomp = {
232            { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
233        };
234        String[][] compose = {
235            { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
236        };
237
238        staticTest(Normalizer.NFD, decomp, 1);
239        staticTest(Normalizer.NFKD,decomp, 2);
240        staticTest(Normalizer.NFC, compose, 1);
241        staticTest(Normalizer.NFKC,compose, 2);
242    }
243
244    /**
245     * Make sure characters in the CompositionExclusion.txt list do not get
246     * composed to.
247     */
248    @Test
249    public void TestCompositionExclusion()
250                throws Exception{
251        // This list is generated from CompositionExclusion.txt.
252        // Update whenever the normalizer tables are updated.  Note
253        // that we test all characters listed, even those that can be
254        // derived from the Unicode DB and are therefore commented
255        // out.
256        String EXCLUDED =
257            "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
258            "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
259            "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
260            "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
261            "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
262            "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
263            "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB" +
264            "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
265            "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
266            "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
267            "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
268            "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
269            "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
270            "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
271        for (int i=0; i<EXCLUDED.length(); ++i) {
272            String a = String.valueOf(EXCLUDED.charAt(i));
273            String b = Normalizer.normalize(a, Normalizer.NFKD);
274            String c = Normalizer.normalize(b, Normalizer.NFC);
275            if (c.equals(a)) {
276                errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
277                      hex(b) + " x COMPOSE => " +
278                      hex(c));
279            } else if (isVerbose()) {
280                logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
281                      hex(b) + " x COMPOSE => " +
282                      hex(c));
283            }
284        }
285        // The following method works too, but it is somewhat
286        // incestuous.  It uses UInfo, which is the same database that
287        // NormalizerBuilder uses, so if something is wrong with
288        // UInfo, the following test won't show it.  All it will show
289        // is that NormalizerBuilder has been run with whatever the
290        // current UInfo is.
291        //
292        // We comment this out in favor of the test above, which
293        // provides independent verification (but also requires
294        // independent updating).
295//      logln("---");
296//      UInfo uinfo = new UInfo();
297//      for (int i=0; i<=0xFFFF; ++i) {
298//          if (!uinfo.isExcludedComposition((char)i) ||
299//              (!uinfo.hasCanonicalDecomposition((char)i) &&
300//               !uinfo.hasCompatibilityDecomposition((char)i))) continue;
301//          String a = String.valueOf((char)i);
302//          String b = Normalizer.normalize(a,Normalizer.DECOMP_COMPAT,0);
303//          String c = Normalizer.normalize(b,Normalizer.COMPOSE,0);
304//          if (c.equals(a)) {
305//              errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
306//                    hex(b) + " x COMPOSE => " +
307//                    hex(c));
308//          } else if (isVerbose()) {
309//              logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
310//                    hex(b) + " x COMPOSE => " +
311//                    hex(c));
312//          }
313//      }
314    }
315
316    /**
317     * Test for a problem that showed up just before ICU 1.6 release
318     * having to do with combining characters with an index of zero.
319     * Such characters do not participate in any canonical
320     * decompositions.  However, having an index of zero means that
321     * they all share one typeMask[] entry, that is, they all have to
322     * map to the same canonical class, which is not the case, in
323     * reality.
324     */
325    @Test
326    public void TestZeroIndex()
327                throws Exception{
328        String[] DATA = {
329            // Expect col1 x COMPOSE_COMPAT => col2
330            // Expect col2 x DECOMP => col3
331            "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
332            "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
333            "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
334            "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
335            "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
336        };
337
338        for (int i=0; i<DATA.length; i+=3) {
339            String a = DATA[i];
340            String b = Normalizer.normalize(a, Normalizer.NFKC);
341            String exp = DATA[i+1];
342            if (b.equals(exp)) {
343                logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b));
344            } else {
345                errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) +
346                      ", expect " + hex(exp));
347            }
348            a = Normalizer.normalize(b, Normalizer.NFD);
349            exp = DATA[i+2];
350            if (a.equals(exp)) {
351                logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
352            } else {
353                errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a) +
354                      ", expect " + hex(exp));
355            }
356        }
357    }
358
359    /**
360     * Test for a problem found by Verisign.  Problem is that
361     * characters at the start of a string are not put in canonical
362     * order correctly by compose() if there is no starter.
363     */
364    @Test
365    public void TestVerisign()
366                throws Exception{
367        String[] inputs = {
368            "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
369            "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
370        };
371        String[] outputs = {
372            "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
373            "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
374        };
375
376        for (int i = 0; i < inputs.length; ++i) {
377            String input = inputs[i];
378            String output = outputs[i];
379            String result = Normalizer.decompose(input, false);
380            if (!result.equals(output)) {
381                errln("FAIL input: " + hex(input));
382                errln(" decompose: " + hex(result));
383                errln("  expected: " + hex(output));
384            }
385            result = Normalizer.compose(input, false);
386            if (!result.equals(output)) {
387                errln("FAIL input: " + hex(input));
388                errln("   compose: " + hex(result));
389                errln("  expected: " + hex(output));
390            }
391        }
392
393    }
394    @Test
395    public void  TestQuickCheckResultNO()
396                 throws Exception{
397        final char CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
398                                0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
399        final char CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
400                                0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
401        final char CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
402                                0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
403        final char CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
404                                0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
405
406
407        final int SIZE = 10;
408
409        int count = 0;
410        for (; count < SIZE; count ++)
411        {
412            if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
413                    Normalizer.NFD,0) != Normalizer.NO)
414            {
415                errln("ERROR in NFD quick check at U+" +
416                       Integer.toHexString(CPNFD[count]));
417                return;
418            }
419            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
420                        Normalizer.NFC,0) !=Normalizer.NO)
421            {
422                errln("ERROR in NFC quick check at U+"+
423                       Integer.toHexString(CPNFC[count]));
424                return;
425            }
426            if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
427                                Normalizer.NFKD,0) != Normalizer.NO)
428            {
429                errln("ERROR in NFKD quick check at U+"+
430                       Integer.toHexString(CPNFKD[count]));
431                return;
432            }
433            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
434                                         Normalizer.NFKC,0) !=Normalizer.NO)
435            {
436                errln("ERROR in NFKC quick check at U+"+
437                       Integer.toHexString(CPNFKC[count]));
438                return;
439            }
440            // for improving coverage
441            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
442                                         Normalizer.NFKC) !=Normalizer.NO)
443            {
444                errln("ERROR in NFKC quick check at U+"+
445                       Integer.toHexString(CPNFKC[count]));
446                return;
447            }
448        }
449    }
450
451
452    @Test
453    public void TestQuickCheckResultYES()
454                throws Exception{
455        final char CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
456                                0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
457        final char CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
458                                0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
459        final char CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
460                                0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
461        final char CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
462                                0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
463
464        final int SIZE = 10;
465        int count = 0;
466
467        char cp = 0;
468        while (cp < 0xA0)
469        {
470            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFD,0)
471                                            != Normalizer.YES)
472            {
473                errln("ERROR in NFD quick check at U+"+
474                                                      Integer.toHexString(cp));
475                return;
476            }
477            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFC,0)
478                                             != Normalizer.YES)
479            {
480                errln("ERROR in NFC quick check at U+"+
481                                                      Integer.toHexString(cp));
482                return;
483            }
484            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKD,0)
485                                             != Normalizer.YES)
486            {
487                errln("ERROR in NFKD quick check at U+" +
488                                                      Integer.toHexString(cp));
489                return;
490            }
491            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC,0)
492                                             != Normalizer.YES)
493            {
494                errln("ERROR in NFKC quick check at U+"+
495                                                       Integer.toHexString(cp));
496                return;
497            }
498            // improve the coverage
499            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC)
500                                             != Normalizer.YES)
501            {
502                errln("ERROR in NFKC quick check at U+"+
503                                                       Integer.toHexString(cp));
504                return;
505            }
506            cp++;
507        }
508
509        for (; count < SIZE; count ++)
510        {
511            if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
512                                         Normalizer.NFD,0)!=Normalizer.YES)
513            {
514                errln("ERROR in NFD quick check at U+"+
515                                             Integer.toHexString(CPNFD[count]));
516                return;
517            }
518            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
519                                         Normalizer.NFC,0)!=Normalizer.YES)
520            {
521                errln("ERROR in NFC quick check at U+"+
522                                             Integer.toHexString(CPNFC[count]));
523                return;
524            }
525            if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
526                                         Normalizer.NFKD,0)!=Normalizer.YES)
527            {
528                errln("ERROR in NFKD quick check at U+"+
529                                    Integer.toHexString(CPNFKD[count]));
530                return;
531            }
532            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
533                                         Normalizer.NFKC,0)!=Normalizer.YES)
534            {
535                errln("ERROR in NFKC quick check at U+"+
536                        Integer.toHexString(CPNFKC[count]));
537                return;
538            }
539            // improve the coverage
540            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
541                                         Normalizer.NFKC)!=Normalizer.YES)
542            {
543                errln("ERROR in NFKC quick check at U+"+
544                        Integer.toHexString(CPNFKC[count]));
545                return;
546            }
547        }
548    }
549    @Test
550    public void TestBengali() throws Exception{
551        String input = "\u09bc\u09be\u09cd\u09be";
552        String output=Normalizer.normalize(input,Normalizer.NFC);
553        if(!input.equals(output)){
554             errln("ERROR in NFC of string");
555        }
556    }
557    @Test
558    public void TestQuickCheckResultMAYBE()
559                throws Exception{
560
561        final char[] CPNFC = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
562                                0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
563        final char[] CPNFKC = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
564                                0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
565
566
567        final int SIZE = 10;
568
569        int count = 0;
570
571        /* NFD and NFKD does not have any MAYBE codepoints */
572        for (; count < SIZE; count ++)
573        {
574            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
575                                        Normalizer.NFC,0)!=Normalizer.MAYBE)
576            {
577                errln("ERROR in NFC quick check at U+"+
578                                            Integer.toHexString(CPNFC[count]));
579                return;
580            }
581            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
582                                       Normalizer.NFKC,0)!=Normalizer.MAYBE)
583            {
584                errln("ERROR in NFKC quick check at U+"+
585                                            Integer.toHexString(CPNFKC[count]));
586                return;
587            }
588            if (Normalizer.quickCheck(new char[]{CPNFC[count]},
589                                        Normalizer.NFC,0)!=Normalizer.MAYBE)
590            {
591                errln("ERROR in NFC quick check at U+"+
592                                            Integer.toHexString(CPNFC[count]));
593                return;
594            }
595            if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
596                                       Normalizer.NFKC,0)!=Normalizer.MAYBE)
597            {
598                errln("ERROR in NFKC quick check at U+"+
599                                            Integer.toHexString(CPNFKC[count]));
600                return;
601            }
602            if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
603                                       Normalizer.NONE,0)!=Normalizer.YES)
604            {
605                errln("ERROR in NONE quick check at U+"+
606                                            Integer.toHexString(CPNFKC[count]));
607                return;
608            }
609        }
610    }
611
612    @Test
613    public void TestQuickCheckStringResult()
614                throws Exception{
615        int count;
616        String d;
617        String c;
618
619        for (count = 0; count < canonTests.length; count ++)
620        {
621            d = canonTests[count][1];
622            c = canonTests[count][2];
623            if (Normalizer.quickCheck(d,Normalizer.NFD,0)
624                                            != Normalizer.YES)
625            {
626                errln("ERROR in NFD quick check for string at count " + count);
627                return;
628            }
629
630            if (Normalizer.quickCheck(c, Normalizer.NFC,0)
631                                            == Normalizer.NO)
632            {
633                errln("ERROR in NFC quick check for string at count " + count);
634                return;
635            }
636        }
637
638        for (count = 0; count < compatTests.length; count ++)
639        {
640            d = compatTests[count][1];
641            c = compatTests[count][2];
642            if (Normalizer.quickCheck(d, Normalizer.NFKD,0)
643                                            != Normalizer.YES)
644            {
645                errln("ERROR in NFKD quick check for string at count " + count);
646                return;
647            }
648
649            if (Normalizer.quickCheck(c,  Normalizer.NFKC,0)
650                                            != Normalizer.YES)
651            {
652                errln("ERROR in NFKC quick check for string at count " + count);
653                return;
654            }
655        }
656    }
657
658    static final int qcToInt(Normalizer.QuickCheckResult qc) {
659        if(qc==Normalizer.NO) {
660            return 0;
661        } else if(qc==Normalizer.YES) {
662            return 1;
663        } else /* Normalizer.MAYBE */ {
664            return 2;
665        }
666    }
667
668    @Test
669    public void TestQuickCheckPerCP() {
670        int c, lead, trail;
671        String s, nfd;
672        int lccc1, lccc2, tccc1, tccc2;
673        int qc1, qc2;
674
675        if(
676            UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
677            UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
678            UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
679            UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2 ||
680            UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS) ||
681            UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
682        ) {
683            errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
684        }
685
686        /*
687         * compare the quick check property values for some code points
688         * to the quick check results for checking same-code point strings
689         */
690        c=0;
691        while(c<0x110000) {
692            s=UTF16.valueOf(c);
693
694            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
695            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
696            if(qc1!=qc2) {
697                errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
698            }
699
700            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
701            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
702            if(qc1!=qc2) {
703                errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
704            }
705
706            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
707            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
708            if(qc1!=qc2) {
709                errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
710            }
711
712            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
713            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
714            if(qc1!=qc2) {
715                errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
716            }
717
718            nfd=Normalizer.normalize(s, Normalizer.NFD);
719            lead=UTF16.charAt(nfd, 0);
720            trail=UTF16.charAt(nfd, nfd.length()-1);
721
722            lccc1=UCharacter.getIntPropertyValue(c, UProperty.LEAD_CANONICAL_COMBINING_CLASS);
723            lccc2=UCharacter.getCombiningClass(lead);
724            tccc1=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
725            tccc2=UCharacter.getCombiningClass(trail);
726
727            if(lccc1!=lccc2) {
728                errln("getIntPropertyValue(lccc)="+lccc1+" != "+lccc2+"=getCombiningClass(lead) for U+"+Integer.toHexString(c));
729            }
730            if(tccc1!=tccc2) {
731                errln("getIntPropertyValue(tccc)="+tccc1+" != "+tccc2+"=getCombiningClass(trail) for U+"+Integer.toHexString(c));
732            }
733
734            /* skip some code points */
735            c=(20*c)/19+1;
736        }
737    }
738
739    //------------------------------------------------------------------------
740    // Internal utilities
741    //
742       //------------------------------------------------------------------------
743    // Internal utilities
744    //
745
746/*    private void backAndForth(Normalizer iter, String input)
747    {
748        iter.setText(input);
749
750        // Run through the iterator forwards and stick it into a StringBuffer
751        StringBuffer forward =  new StringBuffer();
752        for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
753            forward.append(ch);
754        }
755
756        // Now do it backwards
757        StringBuffer reverse = new StringBuffer();
758        for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
759            reverse.insert(0, ch);
760        }
761
762        if (!forward.toString().equals(reverse.toString())) {
763            errln("FAIL: Forward/reverse mismatch for input " + hex(input)
764                  + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
765        } else if (isVerbose()) {
766            logln("Ok: Forward/reverse for input " + hex(input)
767                  + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
768        }
769    }*/
770
771    private void backAndForth(Normalizer iter, String[][] tests)
772    {
773        for (int i = 0; i < tests.length; i++)
774        {
775            iter.setText(tests[i][0]);
776
777            // Run through the iterator forwards and stick it into a
778            // StringBuffer
779            StringBuffer forward =  new StringBuffer();
780            for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
781                forward.append(ch);
782            }
783
784            // Now do it backwards
785            StringBuffer reverse = new StringBuffer();
786            for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
787                reverse.insert(0, ch);
788            }
789
790            if (!forward.toString().equals(reverse.toString())) {
791                errln("FAIL: Forward/reverse mismatch for input "
792                    + hex(tests[i][0]) + ", forward: " + hex(forward)
793                    + ", backward: " + hex(reverse));
794            } else if (isVerbose()) {
795                logln("Ok: Forward/reverse for input " + hex(tests[i][0])
796                      + ", forward: " + hex(forward) + ", backward: "
797                      + hex(reverse));
798            }
799        }
800    }
801
802    private void staticTest (Normalizer.Mode mode,
803                             String[][] tests, int outCol) throws Exception{
804        for (int i = 0; i < tests.length; i++)
805        {
806            String input = Utility.unescape(tests[i][0]);
807            String expect = Utility.unescape(tests[i][outCol]);
808
809            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
810
811            String output = Normalizer.normalize(input, mode);
812
813            if (!output.equals(expect)) {
814                errln("FAIL: case " + i
815                    + " expected '" + expect + "' (" + hex(expect) + ")"
816                    + " but got '" + output + "' (" + hex(output) + ")" );
817            }
818        }
819        char[] output = new char[1];
820        for (int i = 0; i < tests.length; i++)
821        {
822            char[] input = Utility.unescape(tests[i][0]).toCharArray();
823            String expect =Utility.unescape( tests[i][outCol]);
824
825            logln("Normalizing '" + new String(input) + "' (" +
826                        hex(new String(input)) + ")" );
827            int reqLength=0;
828            while(true){
829                try{
830                    reqLength=Normalizer.normalize(input,output, mode,0);
831                    if(reqLength<=output.length    ){
832                        break;
833                    }
834                }catch(IndexOutOfBoundsException e){
835                    output= new char[Integer.parseInt(e.getMessage())];
836                    continue;
837                }
838            }
839            if (!expect.equals(new String(output,0,reqLength))) {
840                errln("FAIL: case " + i
841                    + " expected '" + expect + "' (" + hex(expect) + ")"
842                    + " but got '" + new String(output)
843                    + "' ("  + hex(new String(output)) + ")" );
844            }
845        }
846    }
847    private void decomposeTest(Normalizer.Mode mode,
848                             String[][] tests, int outCol) throws Exception{
849        for (int i = 0; i < tests.length; i++)
850        {
851            String input = Utility.unescape(tests[i][0]);
852            String expect = Utility.unescape(tests[i][outCol]);
853
854            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
855
856            String output = Normalizer.decompose(input, mode==Normalizer.NFKD);
857
858            if (!output.equals(expect)) {
859                errln("FAIL: case " + i
860                    + " expected '" + expect + "' (" + hex(expect) + ")"
861                    + " but got '" + output + "' (" + hex(output) + ")" );
862            }
863        }
864        char[] output = new char[1];
865        for (int i = 0; i < tests.length; i++)
866        {
867            char[] input = Utility.unescape(tests[i][0]).toCharArray();
868            String expect = Utility.unescape(tests[i][outCol]);
869
870            logln("Normalizing '" + new String(input) + "' (" +
871                        hex(new String(input)) + ")" );
872            int reqLength=0;
873            while(true){
874                try{
875                    reqLength=Normalizer.decompose(input,output, mode==Normalizer.NFKD,0);
876                    if(reqLength<=output.length ){
877                        break;
878                    }
879                }catch(IndexOutOfBoundsException e){
880                    output= new char[Integer.parseInt(e.getMessage())];
881                    continue;
882                }
883            }
884            if (!expect.equals(new String(output,0,reqLength))) {
885                errln("FAIL: case " + i
886                    + " expected '" + expect + "' (" + hex(expect) + ")"
887                    + " but got '" + new String(output)
888                    + "' ("  + hex(new String(output)) + ")" );
889            }
890        }
891        output = new char[1];
892        for (int i = 0; i < tests.length; i++)
893        {
894           char[] input = Utility.unescape(tests[i][0]).toCharArray();
895           String expect = Utility.unescape(tests[i][outCol]);
896
897           logln("Normalizing '" + new String(input) + "' (" +
898                       hex(new String(input)) + ")" );
899           int reqLength=0;
900           while(true){
901               try{
902                   reqLength=Normalizer.decompose(input,0,input.length,output,0,output.length, mode==Normalizer.NFKD,0);
903                   if(reqLength<=output.length ){
904                       break;
905                   }
906               }catch(IndexOutOfBoundsException e){
907                   output= new char[Integer.parseInt(e.getMessage())];
908                   continue;
909               }
910           }
911           if (!expect.equals(new String(output,0,reqLength))) {
912               errln("FAIL: case " + i
913                   + " expected '" + expect + "' (" + hex(expect) + ")"
914                   + " but got '" + new String(output)
915                   + "' ("  + hex(new String(output)) + ")" );
916           }
917           char[] output2 = new char[reqLength * 2];
918           System.arraycopy(output, 0, output2, 0, reqLength);
919           int retLength = Normalizer.decompose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
920           if(retLength != reqLength){
921               logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
922           }
923        }
924    }
925
926    private void composeTest(Normalizer.Mode mode,
927                             String[][] tests, int outCol) throws Exception{
928        for (int i = 0; i < tests.length; i++)
929        {
930            String input = Utility.unescape(tests[i][0]);
931            String expect = Utility.unescape(tests[i][outCol]);
932
933            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
934
935            String output = Normalizer.compose(input, mode==Normalizer.NFKC);
936
937            if (!output.equals(expect)) {
938                errln("FAIL: case " + i
939                    + " expected '" + expect + "' (" + hex(expect) + ")"
940                    + " but got '" + output + "' (" + hex(output) + ")" );
941            }
942        }
943        char[] output = new char[1];
944        for (int i = 0; i < tests.length; i++)
945        {
946            char[] input = Utility.unescape(tests[i][0]).toCharArray();
947            String expect = Utility.unescape(tests[i][outCol]);
948
949            logln("Normalizing '" + new String(input) + "' (" +
950                        hex(new String(input)) + ")" );
951            int reqLength=0;
952            while(true){
953                try{
954                    reqLength=Normalizer.compose(input,output, mode==Normalizer.NFKC,0);
955                    if(reqLength<=output.length ){
956                        break;
957                    }
958                }catch(IndexOutOfBoundsException e){
959                    output= new char[Integer.parseInt(e.getMessage())];
960                    continue;
961                }
962            }
963            if (!expect.equals(new String(output,0,reqLength))) {
964                errln("FAIL: case " + i
965                    + " expected '" + expect + "' (" + hex(expect) + ")"
966                    + " but got '" + new String(output)
967                    + "' ("  + hex(new String(output)) + ")" );
968            }
969        }
970        output = new char[1];
971        for (int i = 0; i < tests.length; i++)
972        {
973            char[] input = Utility.unescape(tests[i][0]).toCharArray();
974            String expect = Utility.unescape(tests[i][outCol]);
975
976            logln("Normalizing '" + new String(input) + "' (" +
977                        hex(new String(input)) + ")" );
978            int reqLength=0;
979            while(true){
980                try{
981                    reqLength=Normalizer.compose(input,0,input.length, output, 0, output.length, mode==Normalizer.NFKC,0);
982                    if(reqLength<=output.length ){
983                        break;
984                    }
985                }catch(IndexOutOfBoundsException e){
986                    output= new char[Integer.parseInt(e.getMessage())];
987                    continue;
988                }
989            }
990            if (!expect.equals(new String(output,0,reqLength))) {
991                errln("FAIL: case " + i
992                    + " expected '" + expect + "' (" + hex(expect) + ")"
993                    + " but got '" + new String(output)
994                    + "' ("  + hex(new String(output)) + ")" );
995            }
996
997            char[] output2 = new char[reqLength * 2];
998            System.arraycopy(output, 0, output2, 0, reqLength);
999            int retLength = Normalizer.compose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
1000            if(retLength != reqLength){
1001                logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
1002            }
1003        }
1004    }
1005    private void iterateTest(Normalizer iter, String[][] tests, int outCol){
1006        for (int i = 0; i < tests.length; i++)
1007        {
1008            String input = Utility.unescape(tests[i][0]);
1009            String expect = Utility.unescape(tests[i][outCol]);
1010
1011            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
1012
1013            iter.setText(input);
1014            assertEqual(expect, iter, "case " + i + " ");
1015        }
1016    }
1017
1018    private void assertEqual(String expected, Normalizer iter, String msg)
1019    {
1020        int index = 0;
1021        int ch;
1022        UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1023
1024        while ((ch=iter.next())!= Normalizer.DONE){
1025            if (index >= expected.length()) {
1026                errln("FAIL: " + msg + "Unexpected character '" + (char)ch
1027                        + "' (" + hex(ch) + ")"
1028                        + " at index " + index);
1029                break;
1030            }
1031            int want = UTF16.charAt(expected,index);
1032            if (ch != want) {
1033                errln("FAIL: " + msg + "got '" + (char)ch
1034                        + "' (" + hex(ch) + ")"
1035                        + " but expected '" + want + "' (" + hex(want)+ ")"
1036                        + " at index " + index);
1037            }
1038            index+=  UTF16.getCharCount(ch);
1039        }
1040        if (index < expected.length()) {
1041            errln("FAIL: " + msg + "Only got " + index + " chars, expected "
1042            + expected.length());
1043        }
1044
1045        cIter.setToLimit();
1046        while((ch=iter.previous())!=Normalizer.DONE){
1047            int want = cIter.previousCodePoint();
1048            if (ch != want ) {
1049                errln("FAIL: " + msg + "got '" + (char)ch
1050                        + "' (" + hex(ch) + ")"
1051                        + " but expected '" + want + "' (" + hex(want) + ")"
1052                        + " at index " + index);
1053            }
1054        }
1055    }
1056    //--------------------------------------------------------------------------
1057
1058    // NOTE: These tests are used for quick debugging so are not ported
1059    // to ICU4C tsnorm.cpp in intltest
1060    //
1061
1062    @Test
1063    public void TestDebugStatic(){
1064        String in = Utility.unescape("\\U0001D157\\U0001D165");
1065        if(!Normalizer.isNormalized(in,Normalizer.NFC,0)){
1066            errln("isNormalized failed");
1067        }
1068
1069        String input  =  "\uAD8B\uAD8B\uAD8B\uAD8B"+
1070            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1071            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1072            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1073            "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1074            "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1075            "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1076            "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1077            "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1078            "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1079            "\uAD8B\uAD8B\uAD8B\uAD8B"+
1080            "d\u031B\u0307\u0323";
1081        String expect = "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1082                        "\u11AA\u1100\u116F\u11AA\uD834\uDD57\uD834\uDD65"+
1083                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1084                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1085                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1086                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1087                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1088                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1089                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1090                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1091                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1092                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1093                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1094                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1095                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1096                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1097                        "\uD834\uDD57\uD834\uDD65aaaaaaaaaaaaaaaaaazzzzzz"+
1098                        "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1099                        "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1100                        "bbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccc"+
1101                        "cccccccccccccccccccccccccccccccccccccccccccccccc"+
1102                        "ddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1103                        "dddddddddddddddddddddddd"+
1104                        "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1105                        "\u11AA\u1100\u116F\u11AA\u0064\u031B\u0323\u0307";
1106            String output = Normalizer.normalize(Utility.unescape(input),
1107                            Normalizer.NFD);
1108            if(!expect.equals(output)){
1109                errln("FAIL expected: "+hex(expect) + " got: "+hex(output));
1110            }
1111
1112
1113
1114    }
1115    @Test
1116    public void TestDebugIter(){
1117        String src = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1118        String expected = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1119        Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(src)),
1120                                                Normalizer.NONE,0);
1121        int index = 0;
1122        int ch;
1123        UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1124
1125        while ((ch=iter.next())!= Normalizer.DONE){
1126            if (index >= expected.length()) {
1127                errln("FAIL: " +  "Unexpected character '" + (char)ch
1128                        + "' (" + hex(ch) + ")"
1129                        + " at index " + index);
1130                break;
1131            }
1132            int want = UTF16.charAt(expected,index);
1133            if (ch != want) {
1134                errln("FAIL: " +  "got '" + (char)ch
1135                        + "' (" + hex(ch) + ")"
1136                        + " but expected '" + want + "' (" + hex(want)+ ")"
1137                        + " at index " + index);
1138            }
1139            index+=  UTF16.getCharCount(ch);
1140        }
1141        if (index < expected.length()) {
1142            errln("FAIL: " +  "Only got " + index + " chars, expected "
1143            + expected.length());
1144        }
1145
1146        cIter.setToLimit();
1147        while((ch=iter.previous())!=Normalizer.DONE){
1148            int want = cIter.previousCodePoint();
1149            if (ch != want ) {
1150                errln("FAIL: " + "got '" + (char)ch
1151                        + "' (" + hex(ch) + ")"
1152                        + " but expected '" + want + "' (" + hex(want) + ")"
1153                        + " at index " + index);
1154            }
1155        }
1156    }
1157    @Test
1158    public void TestDebugIterOld(){
1159        String input = "\\U0001D15E";
1160        String expected = "\uD834\uDD57\uD834\uDD65";
1161        String expectedReverse = "\uD834\uDD65\uD834\uDD57";
1162        int index = 0;
1163        int ch;
1164        Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(input)),
1165                                                Normalizer.NFKC,0);
1166        StringBuffer got = new StringBuffer();
1167        for (ch = iter.first();ch!=Normalizer.DONE;ch=iter.next())
1168        {
1169            if (index >= expected.length()) {
1170                errln("FAIL: " +  "Unexpected character '" + (char)ch +
1171                       "' (" + hex(ch) + ")" + " at index " + index);
1172                break;
1173            }
1174            got.append(UCharacter.toString(ch));
1175            index++;
1176        }
1177        if (!expected.equals(got.toString())) {
1178                errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1179                        + " but expected '" + expected + "' ("
1180                        + hex(expected) + ")");
1181        }
1182        if (got.length() < expected.length()) {
1183            errln("FAIL: " +  "Only got " + index + " chars, expected "
1184                           + expected.length());
1185        }
1186
1187        logln("Reverse Iteration\n");
1188        iter.setIndexOnly(iter.endIndex());
1189        got.setLength(0);
1190        for(ch=iter.previous();ch!=Normalizer.DONE;ch=iter.previous()){
1191            if (index >= expected.length()) {
1192                errln("FAIL: " +  "Unexpected character '" + (char)ch
1193                               + "' (" + hex(ch) + ")" + " at index " + index);
1194                break;
1195            }
1196            got.append(UCharacter.toString(ch));
1197        }
1198        if (!expectedReverse.equals(got.toString())) {
1199                errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1200                               + " but expected '" + expected
1201                               + "' (" + hex(expected) + ")");
1202        }
1203        if (got.length() < expected.length()) {
1204            errln("FAIL: " +  "Only got " + index + " chars, expected "
1205                      + expected.length());
1206        }
1207
1208    }
1209    //--------------------------------------------------------------------------
1210    // helper class for TestPreviousNext()
1211    // simple UTF-32 character iterator
1212    class UCharIterator {
1213
1214       public UCharIterator(int[] src, int len, int index){
1215
1216            s=src;
1217            length=len;
1218            i=index;
1219       }
1220
1221        public int current() {
1222            if(i<length) {
1223                return s[i];
1224            } else {
1225                return -1;
1226            }
1227        }
1228
1229        public int next() {
1230            if(i<length) {
1231                return s[i++];
1232            } else {
1233                return -1;
1234            }
1235        }
1236
1237        public int previous() {
1238            if(i>0) {
1239                return s[--i];
1240            } else {
1241                return -1;
1242            }
1243        }
1244
1245        public int getIndex() {
1246            return i;
1247        }
1248
1249        private int[] s;
1250        private int length, i;
1251    }
1252    @Test
1253    public void TestPreviousNext() {
1254        // src and expect strings
1255        char src[]={
1256            UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1257            UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1258            0xc4,
1259            0x1ed0
1260        };
1261        int expect[]={
1262            0x831d,
1263            0x1d158, 0x1d165,
1264            0x41, 0x308,
1265            0x4f, 0x302, 0x301
1266        };
1267
1268        // expected src indexes corresponding to expect indexes
1269        int expectIndex[]={
1270            0,
1271            2, 2,
1272            4, 4,
1273            5, 5, 5,
1274            6 // behind last character
1275        };
1276
1277        // initial indexes into the src and expect strings
1278
1279        final int SRC_MIDDLE=4;
1280        final int EXPECT_MIDDLE=3;
1281
1282
1283        // movement vector
1284        // - for previous(), 0 for current(), + for next()
1285        // not const so that we can terminate it below for the error message
1286        String moves="0+0+0--0-0-+++0--+++++++0--------";
1287
1288        // iterators
1289        Normalizer iter = new Normalizer(new String(src),
1290                                                Normalizer.NFD,0);
1291        UCharIterator iter32 = new UCharIterator(expect, expect.length,
1292                                                     EXPECT_MIDDLE);
1293
1294        int c1, c2;
1295        char m;
1296
1297        // initially set the indexes into the middle of the strings
1298        iter.setIndexOnly(SRC_MIDDLE);
1299
1300        // move around and compare the iteration code points with
1301        // the expected ones
1302        int movesIndex =0;
1303        while(movesIndex<moves.length()) {
1304            m=moves.charAt(movesIndex++);
1305            if(m=='-') {
1306                c1=iter.previous();
1307                c2=iter32.previous();
1308            } else if(m=='0') {
1309                c1=iter.current();
1310                c2=iter32.current();
1311            } else /* m=='+' */ {
1312                c1=iter.next();
1313                c2=iter32.next();
1314            }
1315
1316            // compare results
1317            if(c1!=c2) {
1318                // copy the moves until the current (m) move, and terminate
1319                String history = moves.substring(0,movesIndex);
1320                errln("error: mismatch in Normalizer iteration at "+history+": "
1321                      +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1322                break;
1323            }
1324
1325            // compare indexes
1326            if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1327                // copy the moves until the current (m) move, and terminate
1328                String history = moves.substring(0,movesIndex);
1329                errln("error: index mismatch in Normalizer iteration at "
1330                      +history+ " : "+ "Normalizer index " +iter.getIndex()
1331                      +" expected "+ expectIndex[iter32.getIndex()]);
1332                break;
1333            }
1334        }
1335    }
1336    // Only in ICU4j
1337    @Test
1338    public void TestPreviousNextJCI() {
1339        // src and expect strings
1340        char src[]={
1341            UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1342            UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1343            0xc4,
1344            0x1ed0
1345        };
1346        int expect[]={
1347            0x831d,
1348            0x1d158, 0x1d165,
1349            0x41, 0x308,
1350            0x4f, 0x302, 0x301
1351        };
1352
1353        // expected src indexes corresponding to expect indexes
1354        int expectIndex[]={
1355            0,
1356            2, 2,
1357            4, 4,
1358            5, 5, 5,
1359            6 // behind last character
1360        };
1361
1362        // initial indexes into the src and expect strings
1363
1364        final int SRC_MIDDLE=4;
1365        final int EXPECT_MIDDLE=3;
1366
1367
1368        // movement vector
1369        // - for previous(), 0 for current(), + for next()
1370        // not const so that we can terminate it below for the error message
1371        String moves="0+0+0--0-0-+++0--+++++++0--------";
1372
1373        // iterators
1374        StringCharacterIterator text = new StringCharacterIterator(new String(src));
1375        Normalizer iter = new Normalizer(text,Normalizer.NFD,0);
1376        UCharIterator iter32 = new UCharIterator(expect, expect.length,
1377                                                     EXPECT_MIDDLE);
1378
1379        int c1, c2;
1380        char m;
1381
1382        // initially set the indexes into the middle of the strings
1383        iter.setIndexOnly(SRC_MIDDLE);
1384
1385        // move around and compare the iteration code points with
1386        // the expected ones
1387        int movesIndex =0;
1388        while(movesIndex<moves.length()) {
1389            m=moves.charAt(movesIndex++);
1390            if(m=='-') {
1391                c1=iter.previous();
1392                c2=iter32.previous();
1393            } else if(m=='0') {
1394                c1=iter.current();
1395                c2=iter32.current();
1396            } else /* m=='+' */ {
1397                c1=iter.next();
1398                c2=iter32.next();
1399            }
1400
1401            // compare results
1402            if(c1!=c2) {
1403                // copy the moves until the current (m) move, and terminate
1404                String history = moves.substring(0,movesIndex);
1405                errln("error: mismatch in Normalizer iteration at "+history+": "
1406                      +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1407                break;
1408            }
1409
1410            // compare indexes
1411            if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1412                // copy the moves until the current (m) move, and terminate
1413                String history = moves.substring(0,movesIndex);
1414                errln("error: index mismatch in Normalizer iteration at "
1415                      +history+ " : "+ "Normalizer index " +iter.getIndex()
1416                      +" expected "+ expectIndex[iter32.getIndex()]);
1417                break;
1418            }
1419        }
1420    }
1421
1422    // test APIs that are not otherwise used - improve test coverage
1423    @Test
1424    public void TestNormalizerAPI() throws Exception {
1425        try{
1426            // instantiate a Normalizer from a CharacterIterator
1427            String s=Utility.unescape("a\u0308\uac00\\U0002f800");
1428            // make s a bit longer and more interesting
1429            UCharacterIterator iter = UCharacterIterator.getInstance(s+s);
1430            Normalizer norm = new Normalizer(iter, Normalizer.NFC,0);
1431            if(norm.next()!=0xe4) {
1432                errln("error in Normalizer(CharacterIterator).next()");
1433            }
1434
1435            // test clone(), ==, and hashCode()
1436            Normalizer clone=(Normalizer)norm.clone();
1437            if(clone.equals(norm)) {
1438                errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=norm");
1439            }
1440
1441            if(clone.getLength()!= norm.getLength()){
1442               errln("error in Normalizer.getBeginIndex()");
1443            }
1444            // clone must have the same hashCode()
1445            //if(clone.hashCode()!=norm.hashCode()) {
1446            //    errln("error in Normalizer(Normalizer(CharacterIterator)).clone().hashCode()!=copy.hashCode()");
1447            //}
1448            if(clone.next()!=0xac00) {
1449                errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
1450            }
1451            int ch = clone.next();
1452            if(ch!=0x4e3d) {
1453                errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next()");
1454            }
1455            // position changed, must change hashCode()
1456            if(clone.hashCode()==norm.hashCode()) {
1457                errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next().hashCode()==copy.hashCode()");
1458            }
1459
1460            // test compose() and decompose()
1461            StringBuffer tel;
1462            String nfkc, nfkd;
1463            tel=new StringBuffer("\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121");
1464            tel.insert(1,(char)0x0301);
1465
1466            nfkc=Normalizer.compose(tel.toString(), true);
1467            nfkd=Normalizer.decompose(tel.toString(), true);
1468            if(
1469                !nfkc.equals(Utility.unescape("TE\u0139TELTELTELTELTELTELTELTELTEL"))||
1470                !nfkd.equals(Utility.unescape("TEL\u0301TELTELTELTELTELTELTELTELTEL"))
1471            ) {
1472                errln("error in Normalizer::(de)compose(): wrong result(s)");
1473            }
1474
1475            // test setIndex()
1476            ch=norm.setIndex(3);
1477            if(ch!=0x4e3d) {
1478               errln("error in Normalizer(CharacterIterator).setIndex(3)");
1479            }
1480
1481            // test setText(CharacterIterator) and getText()
1482            String out, out2;
1483            clone.setText(iter);
1484
1485            out = clone.getText();
1486            out2 = iter.getText();
1487            if( !out.equals(out2) ||
1488                clone.startIndex()!=0||
1489                clone.endIndex()!=iter.getLength()
1490            ) {
1491                errln("error in Normalizer::setText() or Normalizer::getText()");
1492            }
1493
1494            char[] fillIn1 = new char[clone.getLength()];
1495            char[] fillIn2 = new char[iter.getLength()];
1496            int len = clone.getText(fillIn1);
1497            iter.getText(fillIn2,0);
1498            if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1499                errln("error in Normalizer.getText(). Normalizer: "+
1500                                Utility.hex(new String(fillIn1))+
1501                                " Iter: " + Utility.hex(new String(fillIn2)));
1502            }
1503
1504            clone.setText(fillIn1);
1505            len = clone.getText(fillIn2);
1506            if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1507                errln("error in Normalizer.setText() or Normalizer.getText()"+
1508                                Utility.hex(new String(fillIn1))+
1509                                " Iter: " + Utility.hex(new String(fillIn2)));
1510            }
1511
1512            // test setText(UChar *), getUMode() and setMode()
1513            clone.setText(s);
1514            clone.setIndexOnly(1);
1515            clone.setMode(Normalizer.NFD);
1516            if(clone.getMode()!=Normalizer.NFD) {
1517                errln("error in Normalizer::setMode() or Normalizer::getMode()");
1518            }
1519            if(clone.next()!=0x308 || clone.next()!=0x1100) {
1520                errln("error in Normalizer::setText() or Normalizer::setMode()");
1521            }
1522
1523            // test last()/previous() with an internal buffer overflow
1524            StringBuffer buf = new StringBuffer("aaaaaaaaaa");
1525            buf.setCharAt(10-1,'\u0308');
1526            clone.setText(buf);
1527            if(clone.last()!=0x308) {
1528                errln("error in Normalizer(10*U+0308).last()");
1529            }
1530
1531            // test UNORM_NONE
1532            norm.setMode(Normalizer.NONE);
1533            if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
1534                errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
1535            }
1536            out=Normalizer.normalize(s, Normalizer.NONE);
1537            if(!out.equals(s)) {
1538                errln("error in Normalizer::normalize(UNORM_NONE)");
1539            }
1540            ch = 0x1D15E;
1541            String exp = "\\U0001D157\\U0001D165";
1542            String ns = Normalizer.normalize(ch,Normalizer.NFC);
1543            if(!ns.equals(Utility.unescape(exp))){
1544                errln("error in Normalizer.normalize(int,Mode)");
1545            }
1546            ns = Normalizer.normalize(ch,Normalizer.NFC,0);
1547            if(!ns.equals(Utility.unescape(exp))){
1548                errln("error in Normalizer.normalize(int,Mode,int)");
1549            }
1550        }catch(Exception e){
1551            throw e;
1552        }
1553    }
1554
1555    @Test
1556    public void TestConcatenate() {
1557
1558        Object[][]cases=new Object[][]{
1559            /* mode, left, right, result */
1560            {
1561                Normalizer.NFC,
1562                "re",
1563                "\u0301sum\u00e9",
1564                "r\u00e9sum\u00e9"
1565            },
1566            {
1567                Normalizer.NFC,
1568                "a\u1100",
1569                "\u1161bcdefghijk",
1570                "a\uac00bcdefghijk"
1571            },
1572            /* ### TODO: add more interesting cases */
1573            {
1574                Normalizer.NFD,
1575                "\u03B1\u0345",
1576                "\u0C4D\uD804\uDCBA\uD834\uDD69",  // 0C4D 110BA 1D169
1577                "\u03B1\uD834\uDD69\uD804\uDCBA\u0C4D\u0345"  // 03B1 1D169 110BA 0C4D 0345
1578            }
1579        };
1580
1581        String left, right, expect, result;
1582        Normalizer.Mode mode;
1583        int i;
1584
1585        /* test concatenation */
1586        for(i=0; i<cases.length; ++i) {
1587            mode = (Normalizer.Mode)cases[i][0];
1588
1589            left=(String)cases[i][1];
1590            right=(String)cases[i][2];
1591            expect=(String)cases[i][3];
1592            {
1593                result=Normalizer.concatenate(left, right, mode,0);
1594                if(!result.equals(expect)) {
1595                    errln("error in Normalizer.concatenate(), cases[] failed"
1596                          +", result==expect: expected: "
1597                          + hex(expect)+" =========> got: " + hex(result));
1598                }
1599            }
1600            {
1601                result=Normalizer.concatenate(left.toCharArray(), right.toCharArray(), mode,0);
1602                if(!result.equals(expect)) {
1603                    errln("error in Normalizer.concatenate(), cases[] failed"
1604                          +", result==expect: expected: "
1605                          + hex(expect)+" =========> got: " + hex(result));
1606                }
1607            }
1608        }
1609
1610        mode= Normalizer.NFC; // (Normalizer.Mode)cases2[0][0];
1611        char[] destination = "My resume is here".toCharArray();
1612        left = "resume";
1613        right = "re\u0301sum\u00e9 is HERE";
1614        expect = "My r\u00e9sum\u00e9 is HERE";
1615
1616        // Concatenates 're' with '\u0301sum\u00e9 is HERE' and places the result at
1617        // position 3 of string 'My resume is here'.
1618        Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1619                                         destination, 3, 17, mode, 0);
1620        if(!String.valueOf(destination).equals(expect)) {
1621            errln("error in Normalizer.concatenate(), cases2[] failed"
1622                  +", result==expect: expected: "
1623                  + hex(expect) + " =========> got: " + hex(destination));
1624        }
1625
1626        // Error case when result of concatenation won't fit into destination array.
1627        try {
1628            Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1629                                         destination, 3, 16, mode, 0);
1630        } catch (IndexOutOfBoundsException e) {
1631            assertTrue("Normalizer.concatenate() failed", e.getMessage().equals("14"));
1632            return;
1633        }
1634        fail("Normalizer.concatenate() tested for failure but passed");
1635    }
1636
1637    private final int RAND_MAX = 0x7fff;
1638
1639    @Test
1640    public void TestCheckFCD()
1641    {
1642      char[] FAST = {0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
1643                     0x0008, 0x0009, 0x000A};
1644
1645      char[] FALSE = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
1646                      0x02B9, 0x0314, 0x0315, 0x0316};
1647
1648      char[] TRUE = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
1649                     0x0050, 0x0730, 0x09EE, 0x1E10};
1650
1651      char[][] datastr= { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
1652                          {0x0061, 0x030A, 0x00E2, 0x0323, 0},
1653                          {0x0061, 0x0323, 0x00E2, 0x0323, 0},
1654                          {0x0061, 0x0323, 0x1E05, 0x0302, 0}
1655                        };
1656      Normalizer.QuickCheckResult result[] = {Normalizer.YES, Normalizer.NO, Normalizer.NO, Normalizer.YES};
1657
1658      char[] datachar= {        0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
1659                                0x6a,
1660                                0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
1661                                0xea,
1662                                0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
1663                                0x0307, 0x0308, 0x0309, 0x030a,
1664                                0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
1665                                0x0327, 0x0328, 0x0329, 0x032a,
1666                                0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
1667                                0x1e07, 0x1e08, 0x1e09, 0x1e0a
1668                       };
1669
1670      int count = 0;
1671
1672      if (Normalizer.quickCheck(FAST,0,FAST.length, Normalizer.FCD,0) != Normalizer.YES)
1673        errln("Normalizer.quickCheck(FCD) failed: expected value for fast Normalizer.quickCheck is Normalizer.YES\n");
1674      if (Normalizer.quickCheck(FALSE,0, FALSE.length,Normalizer.FCD,0) != Normalizer.NO)
1675        errln("Normalizer.quickCheck(FCD) failed: expected value for error Normalizer.quickCheck is Normalizer.NO\n");
1676      if (Normalizer.quickCheck(TRUE,0,TRUE.length,Normalizer.FCD,0) != Normalizer.YES)
1677        errln("Normalizer.quickCheck(FCD) failed: expected value for correct Normalizer.quickCheck is Normalizer.YES\n");
1678
1679
1680      while (count < 4)
1681      {
1682        Normalizer.QuickCheckResult fcdresult = Normalizer.quickCheck(datastr[count],0,datastr[count].length, Normalizer.FCD,0);
1683        if (result[count] != fcdresult) {
1684            errln("Normalizer.quickCheck(FCD) failed: Data set "+ count
1685                    + " expected value "+ result[count]);
1686        }
1687        count ++;
1688      }
1689
1690      /* random checks of long strings */
1691      //srand((unsigned)time( NULL ));
1692      Random rand = createRandom(); // use test framework's random
1693
1694      for (count = 0; count < 50; count ++)
1695      {
1696        int size = 0;
1697        Normalizer.QuickCheckResult testresult = Normalizer.YES;
1698        char[] data= new char[20];
1699        char[] norm= new char[100];
1700        char[] nfd = new char[100];
1701        int normStart = 0;
1702        int nfdsize = 0;
1703        while (size != 19) {
1704          data[size] = datachar[rand.nextInt(RAND_MAX)*50/RAND_MAX];
1705          logln("0x"+data[size]);
1706          normStart += Normalizer.normalize(data,size,size+1,
1707                                              norm,normStart,100,
1708                                              Normalizer.NFD,0);
1709          size ++;
1710        }
1711        logln("\n");
1712
1713        nfdsize = Normalizer.normalize(data,0,size, nfd,0,nfd.length,Normalizer.NFD,0);
1714        //    nfdsize = unorm_normalize(data, size, UNORM_NFD, UCOL_IGNORE_HANGUL,
1715        //                      nfd, 100, &status);
1716        if (nfdsize != normStart || Utility.arrayRegionMatches(nfd,0, norm,0,nfdsize) ==false) {
1717          testresult = Normalizer.NO;
1718        }
1719        if (testresult == Normalizer.YES) {
1720          logln("result Normalizer.YES\n");
1721        }
1722        else {
1723          logln("result Normalizer.NO\n");
1724        }
1725
1726        if (Normalizer.quickCheck(data,0,data.length, Normalizer.FCD,0) != testresult) {
1727          errln("Normalizer.quickCheck(FCD) failed: expected "+ testresult +" for random data: "+hex(new String(data)) );
1728        }
1729      }
1730    }
1731
1732
1733    // reference implementation of Normalizer::compare
1734    private int ref_norm_compare(String s1, String s2, int options) {
1735        String t1, t2,r1,r2;
1736
1737        int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1738
1739        if((options&Normalizer.COMPARE_IGNORE_CASE)!=0) {
1740            // NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
1741            r1 = Normalizer.decompose(s1,false,normOptions);
1742            r2 = Normalizer.decompose(s2,false,normOptions);
1743            r1 = UCharacter.foldCase(r1,options);
1744            r2 = UCharacter.foldCase(r2,options);
1745        }else{
1746            r1 = s1;
1747            r2 = s2;
1748        }
1749
1750        t1 = Normalizer.decompose(r1, false, normOptions);
1751        t2 = Normalizer.decompose(r2, false, normOptions);
1752
1753        if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1754            UTF16.StringComparator comp
1755                    = new UTF16.StringComparator(true, false,
1756                                     UTF16.StringComparator.FOLD_CASE_DEFAULT);
1757            return comp.compare(t1,t2);
1758        } else {
1759            return t1.compareTo(t2);
1760        }
1761
1762    }
1763
1764    // test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately
1765    private int norm_compare(String s1, String s2, int options) {
1766        int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1767
1768        if( Normalizer.YES==Normalizer.quickCheck(s1,Normalizer.FCD,normOptions) &&
1769            Normalizer.YES==Normalizer.quickCheck(s2,Normalizer.FCD,normOptions)) {
1770            options|=Normalizer.INPUT_IS_FCD;
1771        }
1772
1773        int cmpStrings = Normalizer.compare(s1, s2, options);
1774        int cmpArrays = Normalizer.compare(
1775                s1.toCharArray(), 0, s1.length(),
1776                s2.toCharArray(), 0, s2.length(), options);
1777        assertEquals("compare strings == compare char arrays", cmpStrings, cmpArrays);
1778        return cmpStrings;
1779    }
1780
1781    // reference implementation of UnicodeString::caseCompare
1782    private int ref_case_compare(String s1, String s2, int options) {
1783        String t1, t2;
1784
1785        t1=s1;
1786        t2=s2;
1787
1788        t1 = UCharacter.foldCase(t1,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1789        t2 = UCharacter.foldCase(t2,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1790
1791        if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1792            UTF16.StringComparator comp
1793                    = new UTF16.StringComparator(true, false,
1794                                    UTF16.StringComparator.FOLD_CASE_DEFAULT);
1795            return comp.compare(t1,t2);
1796        } else {
1797            return t1.compareTo(t2);
1798        }
1799
1800    }
1801
1802    // reduce an integer to -1/0/1
1803    private static int sign(int value) {
1804        if(value==0) {
1805            return 0;
1806        } else {
1807            return (value>>31)|1;
1808        }
1809    }
1810    private static String signString(int value) {
1811        if(value<0) {
1812            return "<0";
1813        } else if(value==0) {
1814            return "=0";
1815        } else /* value>0 */ {
1816            return ">0";
1817        }
1818    }
1819    // test Normalizer::compare and unorm_compare (thinly wrapped by the former)
1820    // by comparing it with its semantic equivalent
1821    // since we trust the pieces, this is sufficient
1822
1823    // test each string with itself and each other
1824    // each time with all options
1825    private  String strings[]=new String[]{
1826                // some cases from NormalizationTest.txt
1827                // 0..3
1828                "D\u031B\u0307\u0323",
1829                "\u1E0C\u031B\u0307",
1830                "D\u031B\u0323\u0307",
1831                "d\u031B\u0323\u0307",
1832
1833                // 4..6
1834                "\u00E4",
1835                "a\u0308",
1836                "A\u0308",
1837
1838                // Angstrom sign = A ring
1839                // 7..10
1840                "\u212B",
1841                "\u00C5",
1842                "A\u030A",
1843                "a\u030A",
1844
1845                // 11.14
1846                "a\u059A\u0316\u302A\u032Fb",
1847                "a\u302A\u0316\u032F\u059Ab",
1848                "a\u302A\u0316\u032F\u059Ab",
1849                "A\u059A\u0316\u302A\u032Fb",
1850
1851                // from ICU case folding tests
1852                // 15..20
1853                "A\u00df\u00b5\ufb03\\U0001040c\u0131",
1854                "ass\u03bcffi\\U00010434i",
1855                "\u0061\u0042\u0131\u03a3\u00df\ufb03\ud93f\udfff",
1856                "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udfff",
1857                "\u0041\u0062\u0131\u03c3\u0053\u0073\u0066\u0046\u0069\ud93f\udfff",
1858                "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udffd",
1859
1860                //     U+d800 U+10001   see implementation comment in unorm_cmpEquivFold
1861                // vs. U+10000          at bottom - code point order
1862                // 21..22
1863                "\ud800\ud800\udc01",
1864                "\ud800\udc00",
1865
1866                // other code point order tests from ustrtest.cpp
1867                // 23..31
1868                "\u20ac\ud801",
1869                "\u20ac\ud800\udc00",
1870                "\ud800",
1871                "\ud800\uff61",
1872                "\udfff",
1873                "\uff61\udfff",
1874                "\uff61\ud800\udc02",
1875                "\ud800\udc02",
1876                "\ud84d\udc56",
1877
1878                // long strings, see cnormtst.c/TestNormCoverage()
1879                // equivalent if case-insensitive
1880                // 32..33
1881                "\uAD8B\uAD8B\uAD8B\uAD8B"+
1882                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1883                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1884                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1885                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1886                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1887                "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1888                "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1889                "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1890                "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1891                "\uAD8B\uAD8B\uAD8B\uAD8B"+
1892                "d\u031B\u0307\u0323",
1893
1894                "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1895                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1896                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1897                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1898                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1899                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1900                "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1901                "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1902                "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1903                "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1904                "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1905                "\u1E0C\u031B\u0307",
1906
1907                // some strings that may make a difference whether the compare function
1908                // case-folds or decomposes first
1909                // 34..41
1910                "\u0360\u0345\u0334",
1911                "\u0360\u03b9\u0334",
1912
1913                "\u0360\u1f80\u0334",
1914                "\u0360\u03b1\u0313\u03b9\u0334",
1915
1916                "\u0360\u1ffc\u0334",
1917                "\u0360\u03c9\u03b9\u0334",
1918
1919                "a\u0360\u0345\u0360\u0345b",
1920                "a\u0345\u0360\u0345\u0360b",
1921
1922                // interesting cases for canonical caseless match with turkic i handling
1923                // 42..43
1924                "\u00cc",
1925                "\u0069\u0300",
1926
1927                // strings with post-Unicode 3.2 normalization or normalization corrections
1928                // 44..45
1929                "\u00e4\u193b\\U0002f868",
1930                "\u0061\u193b\u0308\u36fc",
1931
1932
1933    };
1934
1935    // all combinations of options
1936    // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions
1937    final class Temp {
1938        int options;
1939        String name;
1940        public Temp(int opt,String str){
1941            options =opt;
1942            name = str;
1943        }
1944
1945    }
1946    // set UNORM_UNICODE_3_2 in one additional combination
1947
1948    private Temp[] opt = new Temp[]{
1949                    new Temp(0,"default"),
1950                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER, "code point order" ),
1951                    new Temp(Normalizer.COMPARE_IGNORE_CASE, "ignore case" ),
1952                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE, "code point order & ignore case" ),
1953                    new Temp(Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & special i"),
1954                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "code point order & ignore case & special i"),
1955                    new Temp(Normalizer.UNICODE_3_2 << Normalizer.COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2")
1956            };
1957
1958
1959    @Test
1960    public void TestCompareDebug(){
1961
1962        String[] s = new String[100]; // at least as many items as in strings[] !
1963
1964
1965        int i, j, k, count=strings.length;
1966        int result, refResult;
1967
1968        // create the UnicodeStrings
1969        for(i=0; i<count; ++i) {
1970            s[i]=Utility.unescape(strings[i]);
1971        }
1972        UTF16.StringComparator comp = new UTF16.StringComparator(true, false,
1973                                     UTF16.StringComparator.FOLD_CASE_DEFAULT);
1974        // test them each with each other
1975
1976        i = 42;
1977        j = 43;
1978        k = 2;
1979        // test Normalizer::compare
1980        result=norm_compare(s[i], s[j], opt[k].options);
1981        refResult=ref_norm_compare(s[i], s[j], opt[k].options);
1982        if(sign(result)!=sign(refResult)) {
1983            errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
1984        }
1985
1986        // test UnicodeString::caseCompare - same internal implementation function
1987         if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
1988        //    result=s[i]. (s[j], opt[k].options);
1989            if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
1990            {
1991                comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
1992            }
1993            else {
1994                comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
1995            }
1996
1997            result=comp.compare(s[i],s[j]);
1998            refResult=ref_case_compare(s[i], s[j], opt[k].options);
1999            if(sign(result)!=sign(refResult)) {
2000                      errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2001                            }
2002        }
2003        String value1 = "\u00dater\u00fd";
2004        String value2 = "\u00fater\u00fd";
2005        if(Normalizer.compare(value1,value2,0)!=0){
2006            if(Normalizer.compare(value1,value2,Normalizer.COMPARE_IGNORE_CASE)==0){
2007
2008            }
2009        }
2010    }
2011
2012    @Test
2013    public void TestCompare() {
2014
2015        String[] s = new String[100]; // at least as many items as in strings[] !
2016
2017        int i, j, k, count=strings.length;
2018        int result, refResult;
2019
2020        // create the UnicodeStrings
2021        for(i=0; i<count; ++i) {
2022            s[i]=Utility.unescape(strings[i]);
2023        }
2024        UTF16.StringComparator comp = new UTF16.StringComparator();
2025        // test them each with each other
2026        for(i=0; i<count; ++i) {
2027            for(j=i; j<count; ++j) {
2028                for(k=0; k<opt.length; ++k) {
2029                    // test Normalizer::compare
2030                    result=norm_compare(s[i], s[j], opt[k].options);
2031                    refResult=ref_norm_compare(s[i], s[j], opt[k].options);
2032                    if(sign(result)!=sign(refResult)) {
2033                        errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2034                    }
2035
2036                    // test UnicodeString::caseCompare - same internal implementation function
2037                     if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
2038                        //    result=s[i]. (s[j], opt[k].options);
2039                        if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2040                        {
2041                            comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2042                        }
2043                        else {
2044                            comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2045                        }
2046
2047                        comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2048                        // result=comp.caseCompare(s[i],s[j], opt[k].options);
2049                        result=comp.compare(s[i],s[j]);
2050                        refResult=ref_case_compare(s[i], s[j], opt[k].options);
2051                        if(sign(result)!=sign(refResult)) {
2052                                  errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2053                                         }
2054                    }
2055                }
2056            }
2057        }
2058
2059        // test cases with i and I to make sure Turkic works
2060        char[] iI= new char[]{ 0x49, 0x69, 0x130, 0x131 };
2061        UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
2062        Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
2063        nfcImpl.ensureCanonIterData();
2064
2065        String s1, s2;
2066
2067        // collect all sets into one for contiguous output
2068        for(i=0; i<iI.length; ++i) {
2069            if(nfcImpl.getCanonStartSet(iI[i], iSet)) {
2070                set.addAll(iSet);
2071            }
2072        }
2073
2074        // test all of these precomposed characters
2075        Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
2076        UnicodeSetIterator it = new UnicodeSetIterator(set);
2077        int c;
2078        while(it.next() && (c=it.codepoint)!=UnicodeSetIterator.IS_STRING) {
2079            s1 = UTF16.valueOf(c);
2080            s2 = nfcNorm2.getDecomposition(c);
2081            for(k=0; k<opt.length; ++k) {
2082                // test Normalizer::compare
2083
2084                result= norm_compare(s1, s2, opt[k].options);
2085                refResult=ref_norm_compare(s1, s2, opt[k].options);
2086                if(sign(result)!=sign(refResult)) {
2087                    errln("Normalizer.compare(U+"+hex(c)+" with its NFD, "+opt[k].name+")"
2088                          + signString(result)+" should be "+signString(refResult));
2089                }
2090
2091                // test UnicodeString::caseCompare - same internal implementation function
2092                if((opt[k].options & Normalizer.COMPARE_IGNORE_CASE)>0) {
2093                     if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2094                    {
2095                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2096                    }
2097                    else {
2098                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2099                    }
2100
2101                    comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2102
2103                    result=comp.compare(s1,s2);
2104                    refResult=ref_case_compare(s1, s2, opt[k].options);
2105                    if(sign(result)!=sign(refResult)) {
2106                        errln("UTF16.compare(U+"+hex(c)+" with its NFD, "
2107                              +opt[k].name+")"+signString(result) +" should be "+signString(refResult));
2108                    }
2109                }
2110            }
2111        }
2112
2113        // test getDecomposition() for some characters that do not decompose
2114        if( nfcNorm2.getDecomposition(0x20)!=null ||
2115            nfcNorm2.getDecomposition(0x4e00)!=null ||
2116            nfcNorm2.getDecomposition(0x20002)!=null
2117        ) {
2118            errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
2119        }
2120
2121        // test getRawDecomposition() for some characters that do not decompose
2122        if( nfcNorm2.getRawDecomposition(0x20)!=null ||
2123            nfcNorm2.getRawDecomposition(0x4e00)!=null ||
2124            nfcNorm2.getRawDecomposition(0x20002)!=null
2125        ) {
2126            errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
2127        }
2128
2129        // test composePair() for some pairs of characters that do not compose
2130        if( nfcNorm2.composePair(0x20, 0x301)>=0 ||
2131            nfcNorm2.composePair(0x61, 0x305)>=0 ||
2132            nfcNorm2.composePair(0x1100, 0x1160)>=0 ||
2133            nfcNorm2.composePair(0xac00, 0x11a7)>=0
2134        ) {
2135            errln("NFC.composePair() incorrectly composes some pairs of characters");
2136        }
2137
2138        // test FilteredNormalizer2.getDecomposition()
2139        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff]");
2140        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2141        if(fn2.getDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
2142            errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
2143        }
2144
2145        // test FilteredNormalizer2.getRawDecomposition()
2146        if(fn2.getRawDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
2147            errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
2148        }
2149
2150        // test FilteredNormalizer2::composePair()
2151        if( 0x100!=fn2.composePair(0x41, 0x304) ||
2152            fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08
2153        ) {
2154            errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
2155        }
2156    }
2157
2158    // verify that case-folding does not un-FCD strings
2159    int countFoldFCDExceptions(int foldingOptions) {
2160        String s, d;
2161        int c;
2162        int count;
2163        int/*unsigned*/ cc, trailCC, foldCC, foldTrailCC;
2164        Normalizer.QuickCheckResult qcResult;
2165        int category;
2166        boolean isNFD;
2167
2168
2169        logln("Test if case folding may un-FCD a string (folding options 0x)"+hex(foldingOptions));
2170
2171        count=0;
2172        for(c=0; c<=0x10ffff; ++c) {
2173            category=UCharacter.getType(c);
2174            if(category==UCharacterCategory.UNASSIGNED) {
2175                continue; // skip unassigned code points
2176            }
2177            if(c==0xac00) {
2178                c=0xd7a3; // skip Hangul - no case folding there
2179                continue;
2180            }
2181            // skip Han blocks - no case folding there either
2182            if(c==0x3400) {
2183                c=0x4db5;
2184                continue;
2185            }
2186            if(c==0x4e00) {
2187                c=0x9fa5;
2188                continue;
2189            }
2190            if(c==0x20000) {
2191                c=0x2a6d6;
2192                continue;
2193            }
2194
2195            s= UTF16.valueOf(c);
2196
2197            // get leading and trailing cc for c
2198            d= Normalizer.decompose(s,false);
2199            isNFD= s==d;
2200            cc=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2201            trailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2202
2203            // get leading and trailing cc for the case-folding of c
2204            UCharacter.foldCase(s,(foldingOptions==0));
2205            d = Normalizer.decompose(s, false);
2206            foldCC=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2207            foldTrailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2208
2209            qcResult=Normalizer.quickCheck(s, Normalizer.FCD,0);
2210
2211
2212            // bad:
2213            // - character maps to empty string: adjacent characters may then need reordering
2214            // - folding has different leading/trailing cc's, and they don't become just 0
2215            // - folding itself is not FCD
2216            if( qcResult!=Normalizer.YES ||
2217                s.length()==0 ||
2218                (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0)
2219            ) {
2220                ++count;
2221                errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2222                //errln("  cc %02x trailCC %02x    foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x   quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult);
2223                continue;
2224            }
2225
2226            // also bad:
2227            // if a code point is in NFD but its case folding is not, then
2228            // unorm_compare will also fail
2229            if(isNFD && Normalizer.YES!=Normalizer.quickCheck(s, Normalizer.NFD,0)) {
2230                ++count;
2231                errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2232            }
2233        }
2234
2235        logln("There are "+hex(count)+" code points for which case-folding may un-FCD a string (folding options"+foldingOptions+"x)" );
2236        return count;
2237    }
2238
2239    @Test
2240    public void TestFindFoldFCDExceptions() {
2241        int count;
2242
2243        count=countFoldFCDExceptions(0);
2244        count+=countFoldFCDExceptions(Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I);
2245        if(count>0) {
2246            //*
2247            //* If case-folding un-FCDs any strings, then unorm_compare() must be
2248            //* re-implemented.
2249            //* It currently assumes that one can check for FCD then case-fold
2250            //* and then still have FCD strings for raw decomposition without reordering.
2251            //*
2252            errln("error: There are "+count+" code points for which case-folding"+
2253                  " may un-FCD a string for all folding options.\n See comment"+
2254                  " in BasicNormalizerTest::FindFoldFCDExceptions()!");
2255        }
2256    }
2257
2258    @Test
2259    public void TestCombiningMarks(){
2260        String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
2261        String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
2262        String result = Normalizer.decompose(src,false);
2263        if(!expected.equals(result)){
2264            errln("Reordering of combining marks failed. Expected: "+Utility.hex(expected)+" Got: "+ Utility.hex(result));
2265        }
2266    }
2267
2268    /*
2269     * Re-enable this test when UTC fixes UAX 21
2270    @Test
2271    public void TestUAX21Failure(){
2272        final String[][] cases = new String[][]{
2273                {"\u0061\u0345\u0360\u0345\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2274                {"\u0061\u0345\u0345\u0360\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2275                {"\u0061\u0345\u0360\u0362\u0360\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2276                {"\u0061\u0360\u0345\u0360\u0362\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2277                {"\u0061\u0345\u0360\u0362\u0361\u0062", "\u0061\u0362\u0360\u0361\u0345\u0062"},
2278                {"\u0061\u0361\u0345\u0360\u0362\u0062", "\u0061\u0362\u0361\u0360\u0345\u0062"},
2279        };
2280        for(int i = 0; i< cases.length; i++){
2281            String s1 =cases[0][0];
2282            String s2 = cases[0][1];
2283            if( (Normalizer.compare(s1,s2,Normalizer.FOLD_CASE_DEFAULT ==0)//case sensitive compare
2284                &&
2285                (Normalizer.compare(s1,s2,Normalizer.COMPARE_IGNORE_CASE)!=0)){
2286                errln("Normalizer.compare() failed for s1: "
2287                        + Utility.hex(s1) +" s2: " + Utility.hex(s2));
2288            }
2289        }
2290    }
2291    */
2292    @Test
2293    public void TestFCNFKCClosure() {
2294        final class TestStruct{
2295            int c;
2296            String s;
2297            TestStruct(int cp, String src){
2298                c=cp;
2299                s=src;
2300            }
2301        }
2302
2303        TestStruct[] tests= new TestStruct[]{
2304            new TestStruct( 0x00C4, "" ),
2305            new TestStruct( 0x00E4, "" ),
2306            new TestStruct( 0x037A, "\u0020\u03B9" ),
2307            new TestStruct( 0x03D2, "\u03C5" ),
2308            new TestStruct( 0x20A8, "\u0072\u0073" ) ,
2309            new TestStruct( 0x210B, "\u0068" ),
2310            new TestStruct( 0x210C, "\u0068" ),
2311            new TestStruct( 0x2121, "\u0074\u0065\u006C" ),
2312            new TestStruct( 0x2122, "\u0074\u006D" ),
2313            new TestStruct( 0x2128, "\u007A" ),
2314            new TestStruct( 0x1D5DB,"\u0068" ),
2315            new TestStruct( 0x1D5ED,"\u007A" ),
2316            new TestStruct( 0x0061, "" )
2317        };
2318
2319
2320        for(int i = 0; i < tests.length; ++ i) {
2321            String result=Normalizer.getFC_NFKC_Closure(tests[i].c);
2322            if(!result.equals(new String(tests[i].s))) {
2323                errln("getFC_NFKC_Closure(U+"+Integer.toHexString(tests[i].c)+") is wrong");
2324            }
2325        }
2326
2327        /* error handling */
2328
2329        int length=Normalizer.getFC_NFKC_Closure(0x5c, null);
2330        if(length!=0){
2331            errln("getFC_NFKC_Closure did not perform error handling correctly");
2332        }
2333    }
2334    @Test
2335    public void TestBugJ2324(){
2336       /* String[] input = new String[]{
2337                            //"\u30FD\u3099",
2338                            "\u30FA\u309A",
2339                            "\u30FB\u309A",
2340                            "\u30FC\u309A",
2341                            "\u30FE\u309A",
2342                            "\u30FD\u309A",
2343
2344        };*/
2345        String troublesome = "\u309A";
2346        for(int i=0x3000; i<0x3100;i++){
2347            String input = ((char)i)+troublesome;
2348            try{
2349              /*  String result =*/ Normalizer.compose(input,false);
2350            }catch(IndexOutOfBoundsException e){
2351                errln("compose() failed for input: " + Utility.hex(input) + " Exception: " + e.toString());
2352            }
2353        }
2354
2355    }
2356
2357    static final int D = 0, C = 1, KD= 2, KC = 3, FCD=4, NONE=5;
2358
2359    private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
2360        skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
2361        skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2362        skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
2363        skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2364
2365        // Remove from the NFC and NFKC sets all those characters that change
2366        // when a back-combining character is added.
2367        // First, get all of the back-combining characters and their combining classes.
2368        UnicodeSet combineBack=new UnicodeSet("[:NFC_QC=Maybe:]");
2369        int numCombineBack=combineBack.size();
2370        int[] combineBackCharsAndCc=new int[numCombineBack*2];
2371        UnicodeSetIterator iter=new UnicodeSetIterator(combineBack);
2372        for(int i=0; i<numCombineBack; ++i) {
2373            iter.next();
2374            int c=iter.codepoint;
2375            combineBackCharsAndCc[2*i]=c;
2376            combineBackCharsAndCc[2*i+1]=UCharacter.getCombiningClass(c);
2377        }
2378
2379        // We need not look at control codes, Han characters nor Hangul LVT syllables because they
2380        // do not combine forward. LV syllables are already removed.
2381        UnicodeSet notInteresting=new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
2382        UnicodeSet unsure=((UnicodeSet)(skipSets[C].clone())).removeAll(notInteresting);
2383        // System.out.format("unsure.size()=%d\n", unsure.size());
2384
2385        // For each character about which we are unsure, see if it changes when we add
2386        // one of the back-combining characters.
2387        Normalizer2 norm2=Normalizer2.getNFCInstance();
2388        StringBuilder s=new StringBuilder();
2389        iter.reset(unsure);
2390        while(iter.next()) {
2391            int c=iter.codepoint;
2392            s.delete(0, 0x7fffffff).appendCodePoint(c);
2393            int cLength=s.length();
2394            int tccc=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
2395            for(int i=0; i<numCombineBack; ++i) {
2396                // If c's decomposition ends with a character with non-zero combining class, then
2397                // c can only change if it combines with a character with a non-zero combining class.
2398                int cc2=combineBackCharsAndCc[2*i+1];
2399                if(tccc==0 || cc2!=0) {
2400                    int c2=combineBackCharsAndCc[2*i];
2401                    s.appendCodePoint(c2);
2402                    if(!norm2.isNormalized(s)) {
2403                        // System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
2404                        skipSets[C].remove(c);
2405                        skipSets[KC].remove(c);
2406                        break;
2407                    }
2408                    s.delete(cLength, 0x7fffffff);
2409                }
2410            }
2411        }
2412        return skipSets;
2413    }
2414
2415    @Test
2416    public void TestSkippable() {
2417        UnicodeSet[] skipSets = new UnicodeSet[] {
2418            new UnicodeSet(), //NFD
2419            new UnicodeSet(), //NFC
2420            new UnicodeSet(), //NFKD
2421            new UnicodeSet()  //NFKC
2422        };
2423        UnicodeSet[] expectSets = new UnicodeSet[] {
2424            new UnicodeSet(),
2425            new UnicodeSet(),
2426            new UnicodeSet(),
2427            new UnicodeSet()
2428        };
2429        StringBuilder s, pattern;
2430
2431        // build NF*Skippable sets from runtime data
2432        skipSets[D].applyPattern("[:NFD_Inert:]");
2433        skipSets[C].applyPattern("[:NFC_Inert:]");
2434        skipSets[KD].applyPattern("[:NFKD_Inert:]");
2435        skipSets[KC].applyPattern("[:NFKC_Inert:]");
2436
2437        expectSets = initSkippables(expectSets);
2438        if(expectSets[D].contains(0x0350)){
2439            errln("expectSets[D] contains 0x0350");
2440        }
2441        for(int i=0; i<expectSets.length; ++i) {
2442            if(!skipSets[i].equals(expectSets[i])) {
2443                errln("error: TestSkippable skipSets["+i+"]!=expectedSets["+i+"]\n");
2444                // Note: This used to depend on hardcoded UnicodeSet patterns generated by
2445                // Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
2446                // running com.ibm.text.UCD.Main with the option NFSkippable.
2447                // Since ICU 4.6/Unicode 6, we are generating the
2448                // expectSets ourselves in initSkippables().
2449
2450                s=new StringBuilder();
2451
2452                s.append("\n\nskip=       ");
2453                s.append(skipSets[i].toPattern(true));
2454                s.append("\n\n");
2455
2456                s.append("skip-expect=");
2457                pattern = new StringBuilder(((UnicodeSet)skipSets[i].clone()).removeAll(expectSets[i]).toPattern(true));
2458                s.append(pattern);
2459
2460                pattern.delete(0,pattern.length());
2461                s.append("\n\nexpect-skip=");
2462                pattern = new StringBuilder(((UnicodeSet)expectSets[i].clone()).removeAll(skipSets[i]).toPattern(true));
2463                s.append(pattern);
2464                s.append("\n\n");
2465
2466                pattern.delete(0,pattern.length());
2467                s.append("\n\nintersection(expect,skip)=");
2468                UnicodeSet intersection  = ((UnicodeSet) expectSets[i].clone()).retainAll(skipSets[i]);
2469                pattern = new StringBuilder(intersection.toPattern(true));
2470                s.append(pattern);
2471                // Special: test coverage for append(char).
2472                s.append('\n');
2473                s.append('\n');
2474
2475                errln(s.toString());
2476            }
2477        }
2478    }
2479
2480    @Test
2481    public void TestBugJ2068(){
2482        String sample = "The quick brown fox jumped over the lazy dog";
2483        UCharacterIterator text = UCharacterIterator.getInstance(sample);
2484        Normalizer norm = new Normalizer(text,Normalizer.NFC,0);
2485        text.setIndex(4);
2486        if(text.current() == norm.current()){
2487            errln("Normalizer is not cloning the UCharacterIterator");
2488        }
2489     }
2490    @Test
2491     public void TestGetCombiningClass(){
2492        for(int i=0;i<0x10FFFF;i++){
2493            int cc = UCharacter.getCombiningClass(i);
2494            if(0xD800<= i && i<=0xDFFF && cc >0 ){
2495                cc = UCharacter.getCombiningClass(i);
2496                errln("CC: "+ cc + " for codepoint: " +Utility.hex(i,8));
2497            }
2498        }
2499    }
2500
2501    @Test
2502    public void TestSerializedSet(){
2503        USerializedSet sset=new USerializedSet();
2504        UnicodeSet set = new UnicodeSet();
2505        int start, end;
2506
2507        char[] serialized = {
2508            0x8007,  // length
2509            3,  // bmpLength
2510            0xc0, 0xfe, 0xfffc,
2511            1, 9, 0x10, 0xfffc
2512        };
2513        sset.getSet(serialized, 0);
2514
2515        // collect all sets into one for contiguous output
2516        int[] startEnd = new int[2];
2517        int count=sset.countRanges();
2518        for(int j=0; j<count; ++j) {
2519            sset.getRange(j, startEnd);
2520            set.add(startEnd[0], startEnd[1]);
2521        }
2522
2523        // test all of these characters
2524        UnicodeSetIterator it = new UnicodeSetIterator(set);
2525        while(it.nextRange() && it.codepoint!=UnicodeSetIterator.IS_STRING) {
2526            start=it.codepoint;
2527            end=it.codepointEnd;
2528            while(start<=end) {
2529                if(!sset.contains(start)){
2530                    errln("USerializedSet.contains failed for "+Utility.hex(start,8));
2531                }
2532                ++start;
2533            }
2534        }
2535    }
2536
2537    @Test
2538    public void TestReturnFailure(){
2539        char[] term = {'r','\u00e9','s','u','m','\u00e9' };
2540        char[] decomposed_term = new char[10 + term.length + 2];
2541        int rc = Normalizer.decompose(term,0,term.length, decomposed_term,0,decomposed_term.length,true, 0);
2542        int rc1 = Normalizer.decompose(term,0,term.length, decomposed_term,10,decomposed_term.length,true, 0);
2543        if(rc!=rc1){
2544            errln("Normalizer decompose did not return correct length");
2545        }
2546    }
2547
2548    private final static class TestCompositionCase {
2549        public Normalizer.Mode mode;
2550        public int options;
2551        public String input, expect;
2552        TestCompositionCase(Normalizer.Mode mode, int options, String input, String expect) {
2553            this.mode=mode;
2554            this.options=options;
2555            this.input=input;
2556            this.expect=expect;
2557        }
2558    }
2559
2560    @Test
2561    public void TestComposition() {
2562        final TestCompositionCase cases[]=new TestCompositionCase[]{
2563            /*
2564             * special cases for UAX #15 bug
2565             * see Unicode Corrigendum #5: Normalization Idempotency
2566             * at http://unicode.org/versions/corrigendum5.html
2567             * (was Public Review Issue #29)
2568             */
2569            new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327",      "\u1100\u0300\u1161\u0327"),
2570            new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327\u11a8","\u1100\u0300\u1161\u0327\u11a8"),
2571            new TestCompositionCase(Normalizer.NFC, 0, "\uac00\u0300\u0327\u11a8",      "\uac00\u0327\u0300\u11a8"),
2572            new TestCompositionCase(Normalizer.NFC, 0, "\u0b47\u0300\u0b3e",            "\u0b47\u0300\u0b3e"),
2573
2574            /* TODO: add test cases for UNORM_FCC here (j2151) */
2575        };
2576
2577        String output;
2578        int i;
2579
2580        for(i=0; i<cases.length; ++i) {
2581            output=Normalizer.normalize(cases[i].input, cases[i].mode, cases[i].options);
2582            if(!output.equals(cases[i].expect)) {
2583                errln("unexpected result for case "+i);
2584            }
2585        }
2586    }
2587
2588    @Test
2589    public void TestGetDecomposition() {
2590        Normalizer2 n2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2591        String decomp=n2.getDecomposition(0x20);
2592        assertEquals("fcc.getDecomposition(space) failed", null, decomp);
2593        decomp=n2.getDecomposition(0xe4);
2594        assertEquals("fcc.getDecomposition(a-umlaut) failed", "a\u0308", decomp);
2595        decomp=n2.getDecomposition(0xac01);
2596        assertEquals("fcc.getDecomposition(Hangul syllable U+AC01) failed", "\u1100\u1161\u11a8", decomp);
2597    }
2598
2599    @Test
2600    public void TestGetRawDecomposition() {
2601        Normalizer2 n2=Normalizer2.getNFKCInstance();
2602        /*
2603         * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
2604         * without recursive decomposition.
2605         */
2606
2607        String decomp=n2.getRawDecomposition(0x20);
2608        assertEquals("nfkc.getRawDecomposition(space) failed", null, decomp);
2609        decomp=n2.getRawDecomposition(0xe4);
2610        assertEquals("nfkc.getRawDecomposition(a-umlaut) failed", "a\u0308", decomp);
2611        /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
2612        decomp=n2.getRawDecomposition(0x1e08);
2613        assertEquals("nfkc.getRawDecomposition(c-cedilla-acute) failed", "\u00c7\u0301", decomp);
2614        /* U+212B ANGSTROM SIGN */
2615        decomp=n2.getRawDecomposition(0x212b);
2616        assertEquals("nfkc.getRawDecomposition(angstrom sign) failed", "\u00c5", decomp);
2617        decomp=n2.getRawDecomposition(0xac00);
2618        assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC00) failed", "\u1100\u1161", decomp);
2619        /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
2620        decomp=n2.getRawDecomposition(0xac01);
2621        assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC01) failed", "\uac00\u11a8", decomp);
2622    }
2623
2624    @Test
2625    public void TestCustomComp() {
2626        String [][] pairs={
2627            { "\\uD801\\uE000\\uDFFE", "" },
2628            { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2629            { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2630            { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" },
2631            { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2632            { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2633            { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2634            { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2635        };
2636        Normalizer2 customNorm2;
2637        customNorm2=
2638            Normalizer2.getInstance(
2639                BasicTest.class.getResourceAsStream("/com/ibm/icu/dev/data/testdata/testnorm.nrm"),
2640                "testnorm",
2641                Normalizer2.Mode.COMPOSE);
2642        for(int i=0; i<pairs.length; ++i) {
2643            String[] pair=pairs[i];
2644            String input=Utility.unescape(pair[0]);
2645            String expected=Utility.unescape(pair[1]);
2646            String result=customNorm2.normalize(input);
2647            if(!result.equals(expected)) {
2648                errln("custom compose Normalizer2 did not normalize input "+i+" as expected");
2649            }
2650        }
2651    }
2652
2653    @Test
2654    public void TestCustomFCC() {
2655        String[][] pairs={
2656            { "\\uD801\\uE000\\uDFFE", "" },
2657            { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2658            { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2659            // The following expected result is different from CustomComp
2660            // because of only-contiguous composition.
2661            { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" },
2662            { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2663            { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2664            { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2665            { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2666        };
2667        Normalizer2 customNorm2;
2668        customNorm2=
2669            Normalizer2.getInstance(
2670                BasicTest.class.getResourceAsStream("/com/ibm/icu/dev/data/testdata/testnorm.nrm"),
2671                "testnorm",
2672                Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2673        for(int i=0; i<pairs.length; ++i) {
2674            String[] pair=pairs[i];
2675            String input=Utility.unescape(pair[0]);
2676            String expected=Utility.unescape(pair[1]);
2677            String result=customNorm2.normalize(input);
2678            if(!result.equals(expected)) {
2679                errln("custom FCC Normalizer2 did not normalize input "+i+" as expected");
2680            }
2681        }
2682    }
2683
2684    @Test
2685    public void TestCanonIterData() {
2686        // For now, just a regression test.
2687        Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl.ensureCanonIterData();
2688        // U+0FB5 TIBETAN SUBJOINED LETTER SSA is the trailing character
2689        // in some decomposition mappings where there is a composition exclusion.
2690        // In fact, U+0FB5 is normalization-inert (NFC_QC=Yes, NFD_QC=Yes, ccc=0)
2691        // but it is not a segment starter because it occurs in a decomposition mapping.
2692        if(impl.isCanonSegmentStarter(0xfb5)) {
2693            errln("isCanonSegmentStarter(U+0fb5)=true is wrong");
2694        }
2695        // For [:Segment_Starter:] to work right, not just the property function has to work right,
2696        // UnicodeSet also needs a correct range starts set.
2697        UnicodeSet segStarters=new UnicodeSet("[:Segment_Starter:]").freeze();
2698        if(segStarters.contains(0xfb5)) {
2699            errln("[:Segment_Starter:].contains(U+0fb5)=true is wrong");
2700        }
2701        // Try characters up to Kana and miscellaneous CJK but below Han (for expediency).
2702        for(int c=0; c<=0x33ff; ++c) {
2703            boolean isStarter=impl.isCanonSegmentStarter(c);
2704            boolean isContained=segStarters.contains(c);
2705            if(isStarter!=isContained) {
2706                errln(String.format(
2707                        "discrepancy: isCanonSegmentStarter(U+%04x)=%5b != " +
2708                        "[:Segment_Starter:].contains(same)",
2709                        c, isStarter));
2710            }
2711        }
2712    }
2713
2714    @Test
2715    public void TestFilteredNormalizer2() {
2716        Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2717        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2718        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2719        int c;
2720        for(c=0; c<=0x3ff; ++c) {
2721            int expectedCC= filter.contains(c) ? nfcNorm2.getCombiningClass(c) : 0;
2722            int cc=fn2.getCombiningClass(c);
2723            assertEquals(
2724                    "FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+"+hex(c)+
2725                    ")==filtered NFC.getCC()",
2726                    expectedCC, cc);
2727        }
2728
2729        // More coverage.
2730        StringBuilder sb=new StringBuilder();
2731        assertEquals("filtered normalize()", "ää\u0304",
2732                fn2.normalize("a\u0308ä\u0304", (Appendable)sb).toString());
2733        assertTrue("filtered hasBoundaryAfter()", fn2.hasBoundaryAfter('ä'));
2734        assertTrue("filtered isInert()", fn2.isInert(0x0313));
2735    }
2736
2737    @Test
2738    public void TestFilteredAppend() {
2739        Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2740        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2741        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2742
2743        // Append two strings that each contain a character outside the filter set.
2744        StringBuilder sb = new StringBuilder("a\u0313a");
2745        String second = "\u0301\u0313";
2746        assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
2747
2748        // Same, and also normalize the second string.
2749        sb.replace(0, 0x7fffffff, "a\u0313a");
2750        assertEquals(
2751            "normalizeSecondAndAppend()",
2752            "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
2753
2754        // Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
2755        assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
2756    }
2757
2758    @Test
2759    public void TestGetEasyToUseInstance() {
2760        // Test input string:
2761        // U+00A0 -> <noBreak> 0020
2762        // U+00C7 0301 = 1E08 = 0043 0327 0301
2763        String in="\u00A0\u00C7\u0301";
2764        Normalizer2 n2=Normalizer2.getNFCInstance();
2765        String out=n2.normalize(in);
2766        assertEquals(
2767                "getNFCInstance() did not return an NFC instance " +
2768                "(normalizes to " + prettify(out) + ')',
2769                "\u00A0\u1E08", out);
2770
2771        n2=Normalizer2.getNFDInstance();
2772        out=n2.normalize(in);
2773        assertEquals(
2774                "getNFDInstance() did not return an NFD instance " +
2775                "(normalizes to " + prettify(out) + ')',
2776                "\u00A0C\u0327\u0301", out);
2777
2778        n2=Normalizer2.getNFKCInstance();
2779        out=n2.normalize(in);
2780        assertEquals(
2781                "getNFKCInstance() did not return an NFKC instance " +
2782                "(normalizes to " + prettify(out) + ')',
2783                " \u1E08", out);
2784
2785        n2=Normalizer2.getNFKDInstance();
2786        out=n2.normalize(in);
2787        assertEquals(
2788                "getNFKDInstance() did not return an NFKD instance " +
2789                "(normalizes to " + prettify(out) + ')',
2790                " C\u0327\u0301", out);
2791
2792        n2=Normalizer2.getNFKCCasefoldInstance();
2793        out=n2.normalize(in);
2794        assertEquals(
2795                "getNFKCCasefoldInstance() did not return an NFKC_Casefold instance " +
2796                "(normalizes to " + prettify(out) + ')',
2797                " \u1E09", out);
2798    }
2799
2800    @Test
2801    public void TestNFC() {
2802        // Coverage tests.
2803        Normalizer2 nfc = Normalizer2.getNFCInstance();
2804        assertTrue("nfc.hasBoundaryAfter(space)", nfc.hasBoundaryAfter(' '));
2805        assertFalse("nfc.hasBoundaryAfter(ä)", nfc.hasBoundaryAfter('ä'));
2806    }
2807
2808    @Test
2809    public void TestNFD() {
2810        // Coverage tests.
2811        Normalizer2 nfd = Normalizer2.getNFDInstance();
2812        assertTrue("nfd.hasBoundaryAfter(space)", nfd.hasBoundaryAfter(' '));
2813        assertFalse("nfd.hasBoundaryAfter(ä)", nfd.hasBoundaryAfter('ä'));
2814    }
2815
2816    @Test
2817    public void TestFCD() {
2818        // Coverage tests.
2819        Normalizer2 fcd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.FCD);
2820        assertTrue("fcd.hasBoundaryAfter(space)", fcd.hasBoundaryAfter(' '));
2821        assertFalse("fcd.hasBoundaryAfter(ä)", fcd.hasBoundaryAfter('ä'));
2822        assertTrue("fcd.isInert(space)", fcd.isInert(' '));
2823        assertFalse("fcd.isInert(ä)", fcd.isInert('ä'));
2824
2825        // This implementation method is unreachable via public API.
2826        Norm2AllModes.FCDNormalizer2 impl = (Norm2AllModes.FCDNormalizer2)fcd;
2827        assertEquals("fcd impl.getQuickCheck(space)", 1, impl.getQuickCheck(' '));
2828        assertEquals("fcd impl.getQuickCheck(ä)", 0, impl.getQuickCheck('ä'));
2829    }
2830
2831    @Test
2832    public void TestNoneNormalizer() {
2833        // Use the deprecated Mode Normalizer.NONE for coverage of the internal NoopNormalizer2
2834        // as far as its methods are reachable that way.
2835        assertEquals("NONE.concatenate()", "ä\u0327",
2836                Normalizer.concatenate("ä", "\u0327", Normalizer.NONE, 0));
2837        assertTrue("NONE.isNormalized()", Normalizer.isNormalized("ä\u0327", Normalizer.NONE, 0));
2838    }
2839
2840    @Test
2841    public void TestNoopNormalizer2() {
2842        // Use the internal class directly for coverage of methods that are not publicly reachable.
2843        Normalizer2 noop = Norm2AllModes.NOOP_NORMALIZER2;
2844        assertEquals("noop.normalizeSecondAndAppend()", "ä\u0327",
2845                noop.normalizeSecondAndAppend(new StringBuilder("ä"), "\u0327").toString());
2846        assertEquals("noop.getDecomposition()", null, noop.getDecomposition('ä'));
2847        assertTrue("noop.hasBoundaryAfter()", noop.hasBoundaryAfter(0x0308));
2848        assertTrue("noop.isInert()", noop.isInert(0x0308));
2849    }
2850
2851    /*
2852     * This unit test covers two 'get' methods in class Normalizer2Impl. It only tests that
2853     * an object is returned.
2854     */
2855    @Test
2856    public void TestGetsFromImpl() {
2857       Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
2858       assertNotEquals("getNormTrie() returns null", null, nfcImpl.getNormTrie());
2859       assertNotEquals("getFCD16FromBelow180() returns null", null,
2860                       nfcImpl.getFCD16FromBelow180(0));
2861    }
2862
2863    /*
2864     * Abstract class Normalizer2 has non-abstract methods which are overwritten by
2865     * its derived classes. To test these methods a derived class is defined here.
2866     */
2867    public class TestNormalizer2 extends Normalizer2 {
2868
2869        public TestNormalizer2() {}
2870        @Override
2871        public StringBuilder normalize(CharSequence src, StringBuilder dest) { return null; }
2872        @Override
2873        public Appendable normalize(CharSequence src, Appendable dest) { return null; }
2874        @Override
2875        public StringBuilder normalizeSecondAndAppend(
2876            StringBuilder first, CharSequence second) { return null; }
2877        @Override
2878        public StringBuilder append(StringBuilder first, CharSequence second) { return null; }
2879        @Override
2880        public String getDecomposition(int c) { return null; }
2881        @Override
2882        public boolean isNormalized(CharSequence s) { return false; }
2883        @Override
2884        public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return null; }
2885        @Override
2886        public int spanQuickCheckYes(CharSequence s) { return 0; }
2887        @Override
2888        public boolean hasBoundaryBefore(int c) { return false; }
2889        @Override
2890        public boolean hasBoundaryAfter(int c) { return false; }
2891        @Override
2892        public boolean isInert(int c) { return false; }
2893    }
2894
2895    final TestNormalizer2 tnorm2 = new TestNormalizer2();
2896    @Test
2897    public void TestGetRawDecompositionBase() {
2898        int c = 'à';
2899        assertEquals("Unexpected value returned from Normalizer2.getRawDecomposition()",
2900                     null, tnorm2.getRawDecomposition(c));
2901    }
2902
2903    @Test
2904    public void TestComposePairBase() {
2905        int a = 'a';
2906        int b = '\u0300';
2907        assertEquals("Unexpected value returned from Normalizer2.composePair()",
2908                     -1, tnorm2.composePair(a, b));
2909    }
2910
2911    @Test
2912    public void TestGetCombiningClassBase() {
2913        int c = '\u00e0';
2914        assertEquals("Unexpected value returned from Normalizer2.getCombiningClass()",
2915                     0, tnorm2.getCombiningClass(c));
2916    }
2917}
2918