1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10
11package android.icu.dev.test.normalizer;
12
13import java.text.StringCharacterIterator;
14import java.util.Random;
15
16import org.junit.Test;
17
18import android.icu.dev.test.TestFmwk;
19import android.icu.impl.Norm2AllModes;
20import android.icu.impl.Normalizer2Impl;
21import android.icu.impl.USerializedSet;
22import android.icu.impl.Utility;
23import android.icu.lang.UCharacter;
24import android.icu.lang.UCharacterCategory;
25import android.icu.lang.UProperty;
26import android.icu.text.FilteredNormalizer2;
27import android.icu.text.Normalizer;
28import android.icu.text.Normalizer2;
29import android.icu.text.UCharacterIterator;
30import android.icu.text.UTF16;
31import android.icu.text.UnicodeSet;
32import android.icu.text.UnicodeSetIterator;
33
34
35public class BasicTest extends TestFmwk {
36    String[][] canonTests = {
37        // Input                Decomposed              Composed
38        { "cat",                "cat",                  "cat"               },
39        { "\u00e0ardvark",      "a\u0300ardvark",       "\u00e0ardvark",    },
40
41        { "\u1e0a",             "D\u0307",              "\u1e0a"            }, // D-dot_above
42        { "D\u0307",            "D\u0307",              "\u1e0a"            }, // D dot_above
43
44        { "\u1e0c\u0307",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_below dot_above
45        { "\u1e0a\u0323",       "D\u0323\u0307",        "\u1e0c\u0307"      }, // D-dot_above dot_below
46        { "D\u0307\u0323",      "D\u0323\u0307",        "\u1e0c\u0307"      }, // D dot_below dot_above
47
48        { "\u1e10\u0307\u0323", "D\u0327\u0323\u0307",  "\u1e10\u0323\u0307"}, // D dot_below cedilla dot_above
49        { "D\u0307\u0328\u0323","D\u0328\u0323\u0307",  "\u1e0c\u0328\u0307"}, // D dot_above ogonek dot_below
50
51        { "\u1E14",             "E\u0304\u0300",        "\u1E14"            }, // E-macron-grave
52        { "\u0112\u0300",       "E\u0304\u0300",        "\u1E14"            }, // E-macron + grave
53        { "\u00c8\u0304",       "E\u0300\u0304",        "\u00c8\u0304"      }, // E-grave + macron
54
55        { "\u212b",             "A\u030a",              "\u00c5"            }, // angstrom_sign
56        { "\u00c5",             "A\u030a",              "\u00c5"            }, // A-ring
57
58        { "\u00c4ffin",         "A\u0308ffin",          "\u00c4ffin"        },
59        { "\u00c4\uFB03n",      "A\u0308\uFB03n",       "\u00c4\uFB03n"     },
60
61        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        }, //updated with 3.0
62        { "\u00fd\uFB03n",      "y\u0301\uFB03n",       "\u00fd\uFB03n"     }, //updated with 3.0
63
64        { "Henry IV",           "Henry IV",             "Henry IV"          },
65        { "Henry \u2163",       "Henry \u2163",         "Henry \u2163"      },
66
67        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
68        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
69        { "\uFF76\uFF9E",       "\uFF76\uFF9E",         "\uFF76\uFF9E"      }, // hw_ka + hw_ten
70        { "\u30AB\uFF9E",       "\u30AB\uFF9E",         "\u30AB\uFF9E"      }, // ka + hw_ten
71        { "\uFF76\u3099",       "\uFF76\u3099",         "\uFF76\u3099"      }, // hw_ka + ten
72
73        { "A\u0300\u0316", "A\u0316\u0300", "\u00C0\u0316" },
74        {"\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e","\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165", "\\U0001D157\\U0001D165\\U0001D157\\U0001D165\\U0001D157\\U0001D165"},
75    };
76
77    String[][] compatTests = {
78            // Input                Decomposed              Composed
79        { "cat",                 "cat",                     "cat"           },
80        { "\uFB4f",             "\u05D0\u05DC",         "\u05D0\u05DC",     }, // Alef-Lamed vs. Alef, Lamed
81
82        { "\u00C4ffin",         "A\u0308ffin",          "\u00C4ffin"        },
83        { "\u00C4\uFB03n",      "A\u0308ffin",          "\u00C4ffin"        }, // ffi ligature -> f + f + i
84
85        { "\u00fdffin",         "y\u0301ffin",          "\u00fdffin"        },        //updated for 3.0
86        { "\u00fd\uFB03n",      "y\u0301ffin",          "\u00fdffin"        }, // ffi ligature -> f + f + i
87
88        { "Henry IV",           "Henry IV",             "Henry IV"          },
89        { "Henry \u2163",       "Henry IV",             "Henry IV"          },
90
91        { "\u30AC",             "\u30AB\u3099",         "\u30AC"            }, // ga (Katakana)
92        { "\u30AB\u3099",       "\u30AB\u3099",         "\u30AC"            }, // ka + ten
93
94        { "\uFF76\u3099",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + ten
95
96        /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
97        { "\uFF76\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // hw_ka + hw_ten
98        { "\u30AB\uFF9E",       "\u30AB\u3099",         "\u30AC"            }, // ka + hw_ten
99
100    };
101
102    // With Canonical decomposition, Hangul syllables should get decomposed
103    // into Jamo, but Jamo characters should not be decomposed into
104    // conjoining Jamo
105    String[][] hangulCanon = {
106        // Input                Decomposed              Composed
107        { "\ud4db",             "\u1111\u1171\u11b6",   "\ud4db"        },
108        { "\u1111\u1171\u11b6", "\u1111\u1171\u11b6",   "\ud4db"        },
109    };
110
111    // With compatibility decomposition turned on,
112    // it should go all the way down to conjoining Jamo characters.
113    // THIS IS NO LONGER TRUE IN UNICODE v2.1.8, SO THIS TEST IS OBSOLETE
114    String[][] hangulCompat = {
115        // Input        Decomposed                          Composed
116        // { "\ud4db",     "\u1111\u116e\u1175\u11af\u11c2",   "\ud478\u1175\u11af\u11c2"  },
117    };
118
119    @Test
120    public void TestHangulCompose()
121                throws Exception{
122        // Make sure that the static composition methods work
123        logln("Canonical composition...");
124        staticTest(Normalizer.NFC, hangulCanon,  2);
125        logln("Compatibility composition...");
126        staticTest(Normalizer.NFKC, hangulCompat, 2);
127        // Now try iterative composition....
128        logln("Iterative composition...");
129        Normalizer norm = new Normalizer("", Normalizer.NFC,0);
130        iterateTest(norm, hangulCanon, 2);
131
132        norm.setMode(Normalizer.NFKD);
133        iterateTest(norm, hangulCompat, 2);
134
135        // And finally, make sure you can do it in reverse too
136        logln("Reverse iteration...");
137        norm.setMode(Normalizer.NFC);
138        backAndForth(norm, hangulCanon);
139     }
140
141    @Test
142    public void TestHangulDecomp() throws Exception{
143        // Make sure that the static decomposition methods work
144        logln("Canonical decomposition...");
145        staticTest(Normalizer.NFD, hangulCanon,  1);
146        logln("Compatibility decomposition...");
147        staticTest(Normalizer.NFKD, hangulCompat, 1);
148
149         // Now the iterative decomposition methods...
150        logln("Iterative decomposition...");
151        Normalizer norm = new Normalizer("", Normalizer.NFD,0);
152        iterateTest(norm, hangulCanon, 1);
153
154        norm.setMode(Normalizer.NFKD);
155        iterateTest(norm, hangulCompat, 1);
156
157        // And finally, make sure you can do it in reverse too
158        logln("Reverse iteration...");
159        norm.setMode(Normalizer.NFD);
160        backAndForth(norm, hangulCanon);
161    }
162    @Test
163    public void TestNone() throws Exception{
164        Normalizer norm = new Normalizer("", Normalizer.NONE,0);
165        iterateTest(norm, canonTests, 0);
166        staticTest(Normalizer.NONE, canonTests, 0);
167    }
168    @Test
169    public void TestDecomp() throws Exception{
170        Normalizer norm = new Normalizer("", Normalizer.NFD,0);
171        iterateTest(norm, canonTests, 1);
172        staticTest(Normalizer.NFD, canonTests, 1);
173        decomposeTest(Normalizer.NFD, canonTests, 1);
174    }
175
176    @Test
177    public void TestCompatDecomp() throws Exception{
178        Normalizer norm = new Normalizer("", Normalizer.NFKD,0);
179        iterateTest(norm, compatTests, 1);
180        staticTest(Normalizer.NFKD,compatTests, 1);
181        decomposeTest(Normalizer.NFKD,compatTests, 1);
182    }
183
184    @Test
185    public void TestCanonCompose() throws Exception{
186        Normalizer norm = new Normalizer("", Normalizer.NFC,0);
187        iterateTest(norm, canonTests, 2);
188        staticTest(Normalizer.NFC, canonTests, 2);
189        composeTest(Normalizer.NFC, canonTests, 2);
190    }
191
192    @Test
193    public void TestCompatCompose() throws Exception{
194        Normalizer norm = new Normalizer("", Normalizer.NFKC,0);
195        iterateTest(norm, compatTests, 2);
196        staticTest(Normalizer.NFKC,compatTests, 2);
197        composeTest(Normalizer.NFKC,compatTests, 2);
198    }
199
200    @Test
201    public void TestExplodingBase() throws Exception{
202        // \u017f - Latin small letter long s
203        // \u0307 - combining dot above
204        // \u1e61 - Latin small letter s with dot above
205        // \u1e9b - Latin small letter long s with dot above
206        String[][] canon = {
207            // Input                Decomposed              Composed
208            { "Tschu\u017f",        "Tschu\u017f",          "Tschu\u017f"    },
209            { "Tschu\u1e9b",        "Tschu\u017f\u0307",    "Tschu\u1e9b"    },
210        };
211        String[][] compat = {
212            // Input                Decomposed              Composed
213            { "\u017f",        "s",              "s"           },
214            { "\u1e9b",        "s\u0307",        "\u1e61"      },
215        };
216
217        staticTest(Normalizer.NFD, canon,  1);
218        staticTest(Normalizer.NFC, canon,  2);
219
220        staticTest(Normalizer.NFKD, compat, 1);
221        staticTest(Normalizer.NFKC, compat, 2);
222
223    }
224
225    /**
226     * The Tibetan vowel sign AA, 0f71, was messed up prior to
227     * Unicode version 2.1.9.
228     * Once 2.1.9 or 3.0 is released, uncomment this test.
229     */
230    @Test
231    public void TestTibetan() throws Exception{
232        String[][] decomp = {
233            { "\u0f77", "\u0f77", "\u0fb2\u0f71\u0f80" }
234        };
235        String[][] compose = {
236            { "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80", "\u0fb2\u0f71\u0f80" }
237        };
238
239        staticTest(Normalizer.NFD, decomp, 1);
240        staticTest(Normalizer.NFKD,decomp, 2);
241        staticTest(Normalizer.NFC, compose, 1);
242        staticTest(Normalizer.NFKC,compose, 2);
243    }
244
245    /**
246     * Make sure characters in the CompositionExclusion.txt list do not get
247     * composed to.
248     */
249    @Test
250    public void TestCompositionExclusion()
251                throws Exception{
252        // This list is generated from CompositionExclusion.txt.
253        // Update whenever the normalizer tables are updated.  Note
254        // that we test all characters listed, even those that can be
255        // derived from the Unicode DB and are therefore commented
256        // out.
257        String EXCLUDED =
258            "\u0340\u0341\u0343\u0344\u0374\u037E\u0387\u0958" +
259            "\u0959\u095A\u095B\u095C\u095D\u095E\u095F\u09DC" +
260            "\u09DD\u09DF\u0A33\u0A36\u0A59\u0A5A\u0A5B\u0A5E" +
261            "\u0B5C\u0B5D\u0F43\u0F4D\u0F52\u0F57\u0F5C\u0F69" +
262            "\u0F73\u0F75\u0F76\u0F78\u0F81\u0F93\u0F9D\u0FA2" +
263            "\u0FA7\u0FAC\u0FB9\u1F71\u1F73\u1F75\u1F77\u1F79" +
264            "\u1F7B\u1F7D\u1FBB\u1FBE\u1FC9\u1FCB\u1FD3\u1FDB" +
265            "\u1FE3\u1FEB\u1FEE\u1FEF\u1FF9\u1FFB\u1FFD\u2000" +
266            "\u2001\u2126\u212A\u212B\u2329\u232A\uF900\uFA10" +
267            "\uFA12\uFA15\uFA20\uFA22\uFA25\uFA26\uFA2A\uFB1F" +
268            "\uFB2A\uFB2B\uFB2C\uFB2D\uFB2E\uFB2F\uFB30\uFB31" +
269            "\uFB32\uFB33\uFB34\uFB35\uFB36\uFB38\uFB39\uFB3A" +
270            "\uFB3B\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46" +
271            "\uFB47\uFB48\uFB49\uFB4A\uFB4B\uFB4C\uFB4D\uFB4E";
272        for (int i=0; i<EXCLUDED.length(); ++i) {
273            String a = String.valueOf(EXCLUDED.charAt(i));
274            String b = Normalizer.normalize(a, Normalizer.NFKD);
275            String c = Normalizer.normalize(b, Normalizer.NFC);
276            if (c.equals(a)) {
277                errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
278                      hex(b) + " x COMPOSE => " +
279                      hex(c));
280            } else if (isVerbose()) {
281                logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
282                      hex(b) + " x COMPOSE => " +
283                      hex(c));
284            }
285        }
286        // The following method works too, but it is somewhat
287        // incestuous.  It uses UInfo, which is the same database that
288        // NormalizerBuilder uses, so if something is wrong with
289        // UInfo, the following test won't show it.  All it will show
290        // is that NormalizerBuilder has been run with whatever the
291        // current UInfo is.
292        //
293        // We comment this out in favor of the test above, which
294        // provides independent verification (but also requires
295        // independent updating).
296//      logln("---");
297//      UInfo uinfo = new UInfo();
298//      for (int i=0; i<=0xFFFF; ++i) {
299//          if (!uinfo.isExcludedComposition((char)i) ||
300//              (!uinfo.hasCanonicalDecomposition((char)i) &&
301//               !uinfo.hasCompatibilityDecomposition((char)i))) continue;
302//          String a = String.valueOf((char)i);
303//          String b = Normalizer.normalize(a,Normalizer.DECOMP_COMPAT,0);
304//          String c = Normalizer.normalize(b,Normalizer.COMPOSE,0);
305//          if (c.equals(a)) {
306//              errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " +
307//                    hex(b) + " x COMPOSE => " +
308//                    hex(c));
309//          } else if (isVerbose()) {
310//              logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " +
311//                    hex(b) + " x COMPOSE => " +
312//                    hex(c));
313//          }
314//      }
315    }
316
317    /**
318     * Test for a problem that showed up just before ICU 1.6 release
319     * having to do with combining characters with an index of zero.
320     * Such characters do not participate in any canonical
321     * decompositions.  However, having an index of zero means that
322     * they all share one typeMask[] entry, that is, they all have to
323     * map to the same canonical class, which is not the case, in
324     * reality.
325     */
326    @Test
327    public void TestZeroIndex()
328                throws Exception{
329        String[] DATA = {
330            // Expect col1 x COMPOSE_COMPAT => col2
331            // Expect col2 x DECOMP => col3
332            "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300",
333            "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300",
334            "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300",
335            "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327",
336            "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
337        };
338
339        for (int i=0; i<DATA.length; i+=3) {
340            String a = DATA[i];
341            String b = Normalizer.normalize(a, Normalizer.NFKC);
342            String exp = DATA[i+1];
343            if (b.equals(exp)) {
344                logln("Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b));
345            } else {
346                errln("FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) +
347                      ", expect " + hex(exp));
348            }
349            a = Normalizer.normalize(b, Normalizer.NFD);
350            exp = DATA[i+2];
351            if (a.equals(exp)) {
352                logln("Ok: " + hex(b) + " x DECOMP => " + hex(a));
353            } else {
354                errln("FAIL: " + hex(b) + " x DECOMP => " + hex(a) +
355                      ", expect " + hex(exp));
356            }
357        }
358    }
359
360    /**
361     * Test for a problem found by Verisign.  Problem is that
362     * characters at the start of a string are not put in canonical
363     * order correctly by compose() if there is no starter.
364     */
365    @Test
366    public void TestVerisign()
367                throws Exception{
368        String[] inputs = {
369            "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f",
370            "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
371        };
372        String[] outputs = {
373            "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f",
374            "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
375        };
376
377        for (int i = 0; i < inputs.length; ++i) {
378            String input = inputs[i];
379            String output = outputs[i];
380            String result = Normalizer.decompose(input, false);
381            if (!result.equals(output)) {
382                errln("FAIL input: " + hex(input));
383                errln(" decompose: " + hex(result));
384                errln("  expected: " + hex(output));
385            }
386            result = Normalizer.compose(input, false);
387            if (!result.equals(output)) {
388                errln("FAIL input: " + hex(input));
389                errln("   compose: " + hex(result));
390                errln("  expected: " + hex(output));
391            }
392        }
393
394    }
395    @Test
396    public void  TestQuickCheckResultNO()
397                 throws Exception{
398        final char CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
399                                0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
400        final char CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
401                                0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
402        final char CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
403                                0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
404        final char CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
405                                0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
406
407
408        final int SIZE = 10;
409
410        int count = 0;
411        for (; count < SIZE; count ++)
412        {
413            if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
414                    Normalizer.NFD,0) != Normalizer.NO)
415            {
416                errln("ERROR in NFD quick check at U+" +
417                       Integer.toHexString(CPNFD[count]));
418                return;
419            }
420            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
421                        Normalizer.NFC,0) !=Normalizer.NO)
422            {
423                errln("ERROR in NFC quick check at U+"+
424                       Integer.toHexString(CPNFC[count]));
425                return;
426            }
427            if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
428                                Normalizer.NFKD,0) != Normalizer.NO)
429            {
430                errln("ERROR in NFKD quick check at U+"+
431                       Integer.toHexString(CPNFKD[count]));
432                return;
433            }
434            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
435                                         Normalizer.NFKC,0) !=Normalizer.NO)
436            {
437                errln("ERROR in NFKC quick check at U+"+
438                       Integer.toHexString(CPNFKC[count]));
439                return;
440            }
441            // for improving coverage
442            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
443                                         Normalizer.NFKC) !=Normalizer.NO)
444            {
445                errln("ERROR in NFKC quick check at U+"+
446                       Integer.toHexString(CPNFKC[count]));
447                return;
448            }
449        }
450    }
451
452
453    @Test
454    public void TestQuickCheckResultYES()
455                throws Exception{
456        final char CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
457                                0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
458        final char CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
459                                0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
460        final char CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
461                                0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
462        final char CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
463                                0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
464
465        final int SIZE = 10;
466        int count = 0;
467
468        char cp = 0;
469        while (cp < 0xA0)
470        {
471            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFD,0)
472                                            != Normalizer.YES)
473            {
474                errln("ERROR in NFD quick check at U+"+
475                                                      Integer.toHexString(cp));
476                return;
477            }
478            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFC,0)
479                                             != Normalizer.YES)
480            {
481                errln("ERROR in NFC quick check at U+"+
482                                                      Integer.toHexString(cp));
483                return;
484            }
485            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKD,0)
486                                             != Normalizer.YES)
487            {
488                errln("ERROR in NFKD quick check at U+" +
489                                                      Integer.toHexString(cp));
490                return;
491            }
492            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC,0)
493                                             != Normalizer.YES)
494            {
495                errln("ERROR in NFKC quick check at U+"+
496                                                       Integer.toHexString(cp));
497                return;
498            }
499            // improve the coverage
500            if (Normalizer.quickCheck(String.valueOf(cp), Normalizer.NFKC)
501                                             != Normalizer.YES)
502            {
503                errln("ERROR in NFKC quick check at U+"+
504                                                       Integer.toHexString(cp));
505                return;
506            }
507            cp++;
508        }
509
510        for (; count < SIZE; count ++)
511        {
512            if (Normalizer.quickCheck(String.valueOf(CPNFD[count]),
513                                         Normalizer.NFD,0)!=Normalizer.YES)
514            {
515                errln("ERROR in NFD quick check at U+"+
516                                             Integer.toHexString(CPNFD[count]));
517                return;
518            }
519            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
520                                         Normalizer.NFC,0)!=Normalizer.YES)
521            {
522                errln("ERROR in NFC quick check at U+"+
523                                             Integer.toHexString(CPNFC[count]));
524                return;
525            }
526            if (Normalizer.quickCheck(String.valueOf(CPNFKD[count]),
527                                         Normalizer.NFKD,0)!=Normalizer.YES)
528            {
529                errln("ERROR in NFKD quick check at U+"+
530                                    Integer.toHexString(CPNFKD[count]));
531                return;
532            }
533            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
534                                         Normalizer.NFKC,0)!=Normalizer.YES)
535            {
536                errln("ERROR in NFKC quick check at U+"+
537                        Integer.toHexString(CPNFKC[count]));
538                return;
539            }
540            // improve the coverage
541            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
542                                         Normalizer.NFKC)!=Normalizer.YES)
543            {
544                errln("ERROR in NFKC quick check at U+"+
545                        Integer.toHexString(CPNFKC[count]));
546                return;
547            }
548        }
549    }
550    @Test
551    public void TestBengali() throws Exception{
552        String input = "\u09bc\u09be\u09cd\u09be";
553        String output=Normalizer.normalize(input,Normalizer.NFC);
554        if(!input.equals(output)){
555             errln("ERROR in NFC of string");
556        }
557    }
558    @Test
559    public void TestQuickCheckResultMAYBE()
560                throws Exception{
561
562        final char[] CPNFC = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
563                                0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
564        final char[] CPNFKC = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
565                                0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
566
567
568        final int SIZE = 10;
569
570        int count = 0;
571
572        /* NFD and NFKD does not have any MAYBE codepoints */
573        for (; count < SIZE; count ++)
574        {
575            if (Normalizer.quickCheck(String.valueOf(CPNFC[count]),
576                                        Normalizer.NFC,0)!=Normalizer.MAYBE)
577            {
578                errln("ERROR in NFC quick check at U+"+
579                                            Integer.toHexString(CPNFC[count]));
580                return;
581            }
582            if (Normalizer.quickCheck(String.valueOf(CPNFKC[count]),
583                                       Normalizer.NFKC,0)!=Normalizer.MAYBE)
584            {
585                errln("ERROR in NFKC quick check at U+"+
586                                            Integer.toHexString(CPNFKC[count]));
587                return;
588            }
589            if (Normalizer.quickCheck(new char[]{CPNFC[count]},
590                                        Normalizer.NFC,0)!=Normalizer.MAYBE)
591            {
592                errln("ERROR in NFC quick check at U+"+
593                                            Integer.toHexString(CPNFC[count]));
594                return;
595            }
596            if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
597                                       Normalizer.NFKC,0)!=Normalizer.MAYBE)
598            {
599                errln("ERROR in NFKC quick check at U+"+
600                                            Integer.toHexString(CPNFKC[count]));
601                return;
602            }
603            if (Normalizer.quickCheck(new char[]{CPNFKC[count]},
604                                       Normalizer.NONE,0)!=Normalizer.YES)
605            {
606                errln("ERROR in NONE quick check at U+"+
607                                            Integer.toHexString(CPNFKC[count]));
608                return;
609            }
610        }
611    }
612
613    @Test
614    public void TestQuickCheckStringResult()
615                throws Exception{
616        int count;
617        String d;
618        String c;
619
620        for (count = 0; count < canonTests.length; count ++)
621        {
622            d = canonTests[count][1];
623            c = canonTests[count][2];
624            if (Normalizer.quickCheck(d,Normalizer.NFD,0)
625                                            != Normalizer.YES)
626            {
627                errln("ERROR in NFD quick check for string at count " + count);
628                return;
629            }
630
631            if (Normalizer.quickCheck(c, Normalizer.NFC,0)
632                                            == Normalizer.NO)
633            {
634                errln("ERROR in NFC quick check for string at count " + count);
635                return;
636            }
637        }
638
639        for (count = 0; count < compatTests.length; count ++)
640        {
641            d = compatTests[count][1];
642            c = compatTests[count][2];
643            if (Normalizer.quickCheck(d, Normalizer.NFKD,0)
644                                            != Normalizer.YES)
645            {
646                errln("ERROR in NFKD quick check for string at count " + count);
647                return;
648            }
649
650            if (Normalizer.quickCheck(c,  Normalizer.NFKC,0)
651                                            != Normalizer.YES)
652            {
653                errln("ERROR in NFKC quick check for string at count " + count);
654                return;
655            }
656        }
657    }
658
659    static final int qcToInt(Normalizer.QuickCheckResult qc) {
660        if(qc==Normalizer.NO) {
661            return 0;
662        } else if(qc==Normalizer.YES) {
663            return 1;
664        } else /* Normalizer.MAYBE */ {
665            return 2;
666        }
667    }
668
669    @Test
670    public void TestQuickCheckPerCP() {
671        int c, lead, trail;
672        String s, nfd;
673        int lccc1, lccc2, tccc1, tccc2;
674        int qc1, qc2;
675
676        if(
677            UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
678            UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
679            UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
680            UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2 ||
681            UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS) ||
682            UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
683        ) {
684            errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
685        }
686
687        /*
688         * compare the quick check property values for some code points
689         * to the quick check results for checking same-code point strings
690         */
691        c=0;
692        while(c<0x110000) {
693            s=UTF16.valueOf(c);
694
695            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
696            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
697            if(qc1!=qc2) {
698                errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
699            }
700
701            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
702            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
703            if(qc1!=qc2) {
704                errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
705            }
706
707            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
708            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
709            if(qc1!=qc2) {
710                errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
711            }
712
713            qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
714            qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
715            if(qc1!=qc2) {
716                errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
717            }
718
719            nfd=Normalizer.normalize(s, Normalizer.NFD);
720            lead=UTF16.charAt(nfd, 0);
721            trail=UTF16.charAt(nfd, nfd.length()-1);
722
723            lccc1=UCharacter.getIntPropertyValue(c, UProperty.LEAD_CANONICAL_COMBINING_CLASS);
724            lccc2=UCharacter.getCombiningClass(lead);
725            tccc1=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
726            tccc2=UCharacter.getCombiningClass(trail);
727
728            if(lccc1!=lccc2) {
729                errln("getIntPropertyValue(lccc)="+lccc1+" != "+lccc2+"=getCombiningClass(lead) for U+"+Integer.toHexString(c));
730            }
731            if(tccc1!=tccc2) {
732                errln("getIntPropertyValue(tccc)="+tccc1+" != "+tccc2+"=getCombiningClass(trail) for U+"+Integer.toHexString(c));
733            }
734
735            /* skip some code points */
736            c=(20*c)/19+1;
737        }
738    }
739
740    //------------------------------------------------------------------------
741    // Internal utilities
742    //
743       //------------------------------------------------------------------------
744    // Internal utilities
745    //
746
747/*    private void backAndForth(Normalizer iter, String input)
748    {
749        iter.setText(input);
750
751        // Run through the iterator forwards and stick it into a StringBuffer
752        StringBuffer forward =  new StringBuffer();
753        for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
754            forward.append(ch);
755        }
756
757        // Now do it backwards
758        StringBuffer reverse = new StringBuffer();
759        for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
760            reverse.insert(0, ch);
761        }
762
763        if (!forward.toString().equals(reverse.toString())) {
764            errln("FAIL: Forward/reverse mismatch for input " + hex(input)
765                  + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
766        } else if (isVerbose()) {
767            logln("Ok: Forward/reverse for input " + hex(input)
768                  + ", forward: " + hex(forward) + ", backward: "+hex(reverse));
769        }
770    }*/
771
772    private void backAndForth(Normalizer iter, String[][] tests)
773    {
774        for (int i = 0; i < tests.length; i++)
775        {
776            iter.setText(tests[i][0]);
777
778            // Run through the iterator forwards and stick it into a
779            // StringBuffer
780            StringBuffer forward =  new StringBuffer();
781            for (int ch = iter.first(); ch != Normalizer.DONE; ch = iter.next()) {
782                forward.append(ch);
783            }
784
785            // Now do it backwards
786            StringBuffer reverse = new StringBuffer();
787            for (int ch = iter.last(); ch != Normalizer.DONE; ch = iter.previous()) {
788                reverse.insert(0, ch);
789            }
790
791            if (!forward.toString().equals(reverse.toString())) {
792                errln("FAIL: Forward/reverse mismatch for input "
793                    + hex(tests[i][0]) + ", forward: " + hex(forward)
794                    + ", backward: " + hex(reverse));
795            } else if (isVerbose()) {
796                logln("Ok: Forward/reverse for input " + hex(tests[i][0])
797                      + ", forward: " + hex(forward) + ", backward: "
798                      + hex(reverse));
799            }
800        }
801    }
802
803    private void staticTest (Normalizer.Mode mode,
804                             String[][] tests, int outCol) throws Exception{
805        for (int i = 0; i < tests.length; i++)
806        {
807            String input = Utility.unescape(tests[i][0]);
808            String expect = Utility.unescape(tests[i][outCol]);
809
810            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
811
812            String output = Normalizer.normalize(input, mode);
813
814            if (!output.equals(expect)) {
815                errln("FAIL: case " + i
816                    + " expected '" + expect + "' (" + hex(expect) + ")"
817                    + " but got '" + output + "' (" + hex(output) + ")" );
818            }
819        }
820        char[] output = new char[1];
821        for (int i = 0; i < tests.length; i++)
822        {
823            char[] input = Utility.unescape(tests[i][0]).toCharArray();
824            String expect =Utility.unescape( tests[i][outCol]);
825
826            logln("Normalizing '" + new String(input) + "' (" +
827                        hex(new String(input)) + ")" );
828            int reqLength=0;
829            while(true){
830                try{
831                    reqLength=Normalizer.normalize(input,output, mode,0);
832                    if(reqLength<=output.length    ){
833                        break;
834                    }
835                }catch(IndexOutOfBoundsException e){
836                    output= new char[Integer.parseInt(e.getMessage())];
837                    continue;
838                }
839            }
840            if (!expect.equals(new String(output,0,reqLength))) {
841                errln("FAIL: case " + i
842                    + " expected '" + expect + "' (" + hex(expect) + ")"
843                    + " but got '" + new String(output)
844                    + "' ("  + hex(new String(output)) + ")" );
845            }
846        }
847    }
848    private void decomposeTest(Normalizer.Mode mode,
849                             String[][] tests, int outCol) throws Exception{
850        for (int i = 0; i < tests.length; i++)
851        {
852            String input = Utility.unescape(tests[i][0]);
853            String expect = Utility.unescape(tests[i][outCol]);
854
855            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
856
857            String output = Normalizer.decompose(input, mode==Normalizer.NFKD);
858
859            if (!output.equals(expect)) {
860                errln("FAIL: case " + i
861                    + " expected '" + expect + "' (" + hex(expect) + ")"
862                    + " but got '" + output + "' (" + hex(output) + ")" );
863            }
864        }
865        char[] output = new char[1];
866        for (int i = 0; i < tests.length; i++)
867        {
868            char[] input = Utility.unescape(tests[i][0]).toCharArray();
869            String expect = Utility.unescape(tests[i][outCol]);
870
871            logln("Normalizing '" + new String(input) + "' (" +
872                        hex(new String(input)) + ")" );
873            int reqLength=0;
874            while(true){
875                try{
876                    reqLength=Normalizer.decompose(input,output, mode==Normalizer.NFKD,0);
877                    if(reqLength<=output.length ){
878                        break;
879                    }
880                }catch(IndexOutOfBoundsException e){
881                    output= new char[Integer.parseInt(e.getMessage())];
882                    continue;
883                }
884            }
885            if (!expect.equals(new String(output,0,reqLength))) {
886                errln("FAIL: case " + i
887                    + " expected '" + expect + "' (" + hex(expect) + ")"
888                    + " but got '" + new String(output)
889                    + "' ("  + hex(new String(output)) + ")" );
890            }
891        }
892        output = new char[1];
893        for (int i = 0; i < tests.length; i++)
894        {
895           char[] input = Utility.unescape(tests[i][0]).toCharArray();
896           String expect = Utility.unescape(tests[i][outCol]);
897
898           logln("Normalizing '" + new String(input) + "' (" +
899                       hex(new String(input)) + ")" );
900           int reqLength=0;
901           while(true){
902               try{
903                   reqLength=Normalizer.decompose(input,0,input.length,output,0,output.length, mode==Normalizer.NFKD,0);
904                   if(reqLength<=output.length ){
905                       break;
906                   }
907               }catch(IndexOutOfBoundsException e){
908                   output= new char[Integer.parseInt(e.getMessage())];
909                   continue;
910               }
911           }
912           if (!expect.equals(new String(output,0,reqLength))) {
913               errln("FAIL: case " + i
914                   + " expected '" + expect + "' (" + hex(expect) + ")"
915                   + " but got '" + new String(output)
916                   + "' ("  + hex(new String(output)) + ")" );
917           }
918           char[] output2 = new char[reqLength * 2];
919           System.arraycopy(output, 0, output2, 0, reqLength);
920           int retLength = Normalizer.decompose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
921           if(retLength != reqLength){
922               logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
923           }
924        }
925    }
926
927    private void composeTest(Normalizer.Mode mode,
928                             String[][] tests, int outCol) throws Exception{
929        for (int i = 0; i < tests.length; i++)
930        {
931            String input = Utility.unescape(tests[i][0]);
932            String expect = Utility.unescape(tests[i][outCol]);
933
934            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
935
936            String output = Normalizer.compose(input, mode==Normalizer.NFKC);
937
938            if (!output.equals(expect)) {
939                errln("FAIL: case " + i
940                    + " expected '" + expect + "' (" + hex(expect) + ")"
941                    + " but got '" + output + "' (" + hex(output) + ")" );
942            }
943        }
944        char[] output = new char[1];
945        for (int i = 0; i < tests.length; i++)
946        {
947            char[] input = Utility.unescape(tests[i][0]).toCharArray();
948            String expect = Utility.unescape(tests[i][outCol]);
949
950            logln("Normalizing '" + new String(input) + "' (" +
951                        hex(new String(input)) + ")" );
952            int reqLength=0;
953            while(true){
954                try{
955                    reqLength=Normalizer.compose(input,output, mode==Normalizer.NFKC,0);
956                    if(reqLength<=output.length ){
957                        break;
958                    }
959                }catch(IndexOutOfBoundsException e){
960                    output= new char[Integer.parseInt(e.getMessage())];
961                    continue;
962                }
963            }
964            if (!expect.equals(new String(output,0,reqLength))) {
965                errln("FAIL: case " + i
966                    + " expected '" + expect + "' (" + hex(expect) + ")"
967                    + " but got '" + new String(output)
968                    + "' ("  + hex(new String(output)) + ")" );
969            }
970        }
971        output = new char[1];
972        for (int i = 0; i < tests.length; i++)
973        {
974            char[] input = Utility.unescape(tests[i][0]).toCharArray();
975            String expect = Utility.unescape(tests[i][outCol]);
976
977            logln("Normalizing '" + new String(input) + "' (" +
978                        hex(new String(input)) + ")" );
979            int reqLength=0;
980            while(true){
981                try{
982                    reqLength=Normalizer.compose(input,0,input.length, output, 0, output.length, mode==Normalizer.NFKC,0);
983                    if(reqLength<=output.length ){
984                        break;
985                    }
986                }catch(IndexOutOfBoundsException e){
987                    output= new char[Integer.parseInt(e.getMessage())];
988                    continue;
989                }
990            }
991            if (!expect.equals(new String(output,0,reqLength))) {
992                errln("FAIL: case " + i
993                    + " expected '" + expect + "' (" + hex(expect) + ")"
994                    + " but got '" + new String(output)
995                    + "' ("  + hex(new String(output)) + ")" );
996            }
997
998            char[] output2 = new char[reqLength * 2];
999            System.arraycopy(output, 0, output2, 0, reqLength);
1000            int retLength = Normalizer.compose(input,0,input.length, output2, reqLength, output2.length, mode==Normalizer.NFKC,0);
1001            if(retLength != reqLength){
1002                logln("FAIL: Normalizer.compose did not return the expected length. Expected: " +reqLength + " Got: " + retLength);
1003            }
1004        }
1005    }
1006    private void iterateTest(Normalizer iter, String[][] tests, int outCol){
1007        for (int i = 0; i < tests.length; i++)
1008        {
1009            String input = Utility.unescape(tests[i][0]);
1010            String expect = Utility.unescape(tests[i][outCol]);
1011
1012            logln("Normalizing '" + input + "' (" + hex(input) + ")" );
1013
1014            iter.setText(input);
1015            assertEqual(expect, iter, "case " + i + " ");
1016        }
1017    }
1018
1019    private void assertEqual(String expected, Normalizer iter, String msg)
1020    {
1021        int index = 0;
1022        int ch;
1023        UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1024
1025        while ((ch=iter.next())!= Normalizer.DONE){
1026            if (index >= expected.length()) {
1027                errln("FAIL: " + msg + "Unexpected character '" + (char)ch
1028                        + "' (" + hex(ch) + ")"
1029                        + " at index " + index);
1030                break;
1031            }
1032            int want = UTF16.charAt(expected,index);
1033            if (ch != want) {
1034                errln("FAIL: " + msg + "got '" + (char)ch
1035                        + "' (" + hex(ch) + ")"
1036                        + " but expected '" + want + "' (" + hex(want)+ ")"
1037                        + " at index " + index);
1038            }
1039            index+=  UTF16.getCharCount(ch);
1040        }
1041        if (index < expected.length()) {
1042            errln("FAIL: " + msg + "Only got " + index + " chars, expected "
1043            + expected.length());
1044        }
1045
1046        cIter.setToLimit();
1047        while((ch=iter.previous())!=Normalizer.DONE){
1048            int want = cIter.previousCodePoint();
1049            if (ch != want ) {
1050                errln("FAIL: " + msg + "got '" + (char)ch
1051                        + "' (" + hex(ch) + ")"
1052                        + " but expected '" + want + "' (" + hex(want) + ")"
1053                        + " at index " + index);
1054            }
1055        }
1056    }
1057    //--------------------------------------------------------------------------
1058
1059    // NOTE: These tests are used for quick debugging so are not ported
1060    // to ICU4C tsnorm.cpp in intltest
1061    //
1062
1063    @Test
1064    public void TestDebugStatic(){
1065        String in = Utility.unescape("\\U0001D157\\U0001D165");
1066        if(!Normalizer.isNormalized(in,Normalizer.NFC,0)){
1067            errln("isNormalized failed");
1068        }
1069
1070        String input  =  "\uAD8B\uAD8B\uAD8B\uAD8B"+
1071            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1072            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1073            "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1074            "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1075            "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1076            "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1077            "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1078            "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1079            "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1080            "\uAD8B\uAD8B\uAD8B\uAD8B"+
1081            "d\u031B\u0307\u0323";
1082        String expect = "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1083                        "\u11AA\u1100\u116F\u11AA\uD834\uDD57\uD834\uDD65"+
1084                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1085                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1086                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1087                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1088                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1089                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1090                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1091                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1092                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1093                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1094                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1095                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1096                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1097                        "\uD834\uDD57\uD834\uDD65\uD834\uDD57\uD834\uDD65"+
1098                        "\uD834\uDD57\uD834\uDD65aaaaaaaaaaaaaaaaaazzzzzz"+
1099                        "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1100                        "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1101                        "bbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccc"+
1102                        "cccccccccccccccccccccccccccccccccccccccccccccccc"+
1103                        "ddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1104                        "dddddddddddddddddddddddd"+
1105                        "\u1100\u116F\u11AA\u1100\u116F\u11AA\u1100\u116F"+
1106                        "\u11AA\u1100\u116F\u11AA\u0064\u031B\u0323\u0307";
1107            String output = Normalizer.normalize(Utility.unescape(input),
1108                            Normalizer.NFD);
1109            if(!expect.equals(output)){
1110                errln("FAIL expected: "+hex(expect) + " got: "+hex(output));
1111            }
1112
1113
1114
1115    }
1116    @Test
1117    public void TestDebugIter(){
1118        String src = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1119        String expected = Utility.unescape("\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e");
1120        Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(src)),
1121                                                Normalizer.NONE,0);
1122        int index = 0;
1123        int ch;
1124        UCharacterIterator cIter =  UCharacterIterator.getInstance(expected);
1125
1126        while ((ch=iter.next())!= Normalizer.DONE){
1127            if (index >= expected.length()) {
1128                errln("FAIL: " +  "Unexpected character '" + (char)ch
1129                        + "' (" + hex(ch) + ")"
1130                        + " at index " + index);
1131                break;
1132            }
1133            int want = UTF16.charAt(expected,index);
1134            if (ch != want) {
1135                errln("FAIL: " +  "got '" + (char)ch
1136                        + "' (" + hex(ch) + ")"
1137                        + " but expected '" + want + "' (" + hex(want)+ ")"
1138                        + " at index " + index);
1139            }
1140            index+=  UTF16.getCharCount(ch);
1141        }
1142        if (index < expected.length()) {
1143            errln("FAIL: " +  "Only got " + index + " chars, expected "
1144            + expected.length());
1145        }
1146
1147        cIter.setToLimit();
1148        while((ch=iter.previous())!=Normalizer.DONE){
1149            int want = cIter.previousCodePoint();
1150            if (ch != want ) {
1151                errln("FAIL: " + "got '" + (char)ch
1152                        + "' (" + hex(ch) + ")"
1153                        + " but expected '" + want + "' (" + hex(want) + ")"
1154                        + " at index " + index);
1155            }
1156        }
1157    }
1158    @Test
1159    public void TestDebugIterOld(){
1160        String input = "\\U0001D15E";
1161        String expected = "\uD834\uDD57\uD834\uDD65";
1162        String expectedReverse = "\uD834\uDD65\uD834\uDD57";
1163        int index = 0;
1164        int ch;
1165        Normalizer iter = new Normalizer(new StringCharacterIterator(Utility.unescape(input)),
1166                                                Normalizer.NFKC,0);
1167        StringBuffer got = new StringBuffer();
1168        for (ch = iter.first();ch!=Normalizer.DONE;ch=iter.next())
1169        {
1170            if (index >= expected.length()) {
1171                errln("FAIL: " +  "Unexpected character '" + (char)ch +
1172                       "' (" + hex(ch) + ")" + " at index " + index);
1173                break;
1174            }
1175            got.append(UCharacter.toString(ch));
1176            index++;
1177        }
1178        if (!expected.equals(got.toString())) {
1179                errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1180                        + " but expected '" + expected + "' ("
1181                        + hex(expected) + ")");
1182        }
1183        if (got.length() < expected.length()) {
1184            errln("FAIL: " +  "Only got " + index + " chars, expected "
1185                           + expected.length());
1186        }
1187
1188        logln("Reverse Iteration\n");
1189        iter.setIndexOnly(iter.endIndex());
1190        got.setLength(0);
1191        for(ch=iter.previous();ch!=Normalizer.DONE;ch=iter.previous()){
1192            if (index >= expected.length()) {
1193                errln("FAIL: " +  "Unexpected character '" + (char)ch
1194                               + "' (" + hex(ch) + ")" + " at index " + index);
1195                break;
1196            }
1197            got.append(UCharacter.toString(ch));
1198        }
1199        if (!expectedReverse.equals(got.toString())) {
1200                errln("FAIL: " +  "got '" +got+ "' (" + hex(got) + ")"
1201                               + " but expected '" + expected
1202                               + "' (" + hex(expected) + ")");
1203        }
1204        if (got.length() < expected.length()) {
1205            errln("FAIL: " +  "Only got " + index + " chars, expected "
1206                      + expected.length());
1207        }
1208
1209    }
1210    //--------------------------------------------------------------------------
1211    // helper class for TestPreviousNext()
1212    // simple UTF-32 character iterator
1213    class UCharIterator {
1214
1215       public UCharIterator(int[] src, int len, int index){
1216
1217            s=src;
1218            length=len;
1219            i=index;
1220       }
1221
1222        public int current() {
1223            if(i<length) {
1224                return s[i];
1225            } else {
1226                return -1;
1227            }
1228        }
1229
1230        public int next() {
1231            if(i<length) {
1232                return s[i++];
1233            } else {
1234                return -1;
1235            }
1236        }
1237
1238        public int previous() {
1239            if(i>0) {
1240                return s[--i];
1241            } else {
1242                return -1;
1243            }
1244        }
1245
1246        public int getIndex() {
1247            return i;
1248        }
1249
1250        private int[] s;
1251        private int length, i;
1252    }
1253    @Test
1254    public void TestPreviousNext() {
1255        // src and expect strings
1256        char src[]={
1257            UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1258            UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1259            0xc4,
1260            0x1ed0
1261        };
1262        int expect[]={
1263            0x831d,
1264            0x1d158, 0x1d165,
1265            0x41, 0x308,
1266            0x4f, 0x302, 0x301
1267        };
1268
1269        // expected src indexes corresponding to expect indexes
1270        int expectIndex[]={
1271            0,
1272            2, 2,
1273            4, 4,
1274            5, 5, 5,
1275            6 // behind last character
1276        };
1277
1278        // initial indexes into the src and expect strings
1279
1280        final int SRC_MIDDLE=4;
1281        final int EXPECT_MIDDLE=3;
1282
1283
1284        // movement vector
1285        // - for previous(), 0 for current(), + for next()
1286        // not const so that we can terminate it below for the error message
1287        String moves="0+0+0--0-0-+++0--+++++++0--------";
1288
1289        // iterators
1290        Normalizer iter = new Normalizer(new String(src),
1291                                                Normalizer.NFD,0);
1292        UCharIterator iter32 = new UCharIterator(expect, expect.length,
1293                                                     EXPECT_MIDDLE);
1294
1295        int c1, c2;
1296        char m;
1297
1298        // initially set the indexes into the middle of the strings
1299        iter.setIndexOnly(SRC_MIDDLE);
1300
1301        // move around and compare the iteration code points with
1302        // the expected ones
1303        int movesIndex =0;
1304        while(movesIndex<moves.length()) {
1305            m=moves.charAt(movesIndex++);
1306            if(m=='-') {
1307                c1=iter.previous();
1308                c2=iter32.previous();
1309            } else if(m=='0') {
1310                c1=iter.current();
1311                c2=iter32.current();
1312            } else /* m=='+' */ {
1313                c1=iter.next();
1314                c2=iter32.next();
1315            }
1316
1317            // compare results
1318            if(c1!=c2) {
1319                // copy the moves until the current (m) move, and terminate
1320                String history = moves.substring(0,movesIndex);
1321                errln("error: mismatch in Normalizer iteration at "+history+": "
1322                      +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1323                break;
1324            }
1325
1326            // compare indexes
1327            if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1328                // copy the moves until the current (m) move, and terminate
1329                String history = moves.substring(0,movesIndex);
1330                errln("error: index mismatch in Normalizer iteration at "
1331                      +history+ " : "+ "Normalizer index " +iter.getIndex()
1332                      +" expected "+ expectIndex[iter32.getIndex()]);
1333                break;
1334            }
1335        }
1336    }
1337    // Only in ICU4j
1338    @Test
1339    public void TestPreviousNextJCI() {
1340        // src and expect strings
1341        char src[]={
1342            UTF16.getLeadSurrogate(0x2f999), UTF16.getTrailSurrogate(0x2f999),
1343            UTF16.getLeadSurrogate(0x1d15f), UTF16.getTrailSurrogate(0x1d15f),
1344            0xc4,
1345            0x1ed0
1346        };
1347        int expect[]={
1348            0x831d,
1349            0x1d158, 0x1d165,
1350            0x41, 0x308,
1351            0x4f, 0x302, 0x301
1352        };
1353
1354        // expected src indexes corresponding to expect indexes
1355        int expectIndex[]={
1356            0,
1357            2, 2,
1358            4, 4,
1359            5, 5, 5,
1360            6 // behind last character
1361        };
1362
1363        // initial indexes into the src and expect strings
1364
1365        final int SRC_MIDDLE=4;
1366        final int EXPECT_MIDDLE=3;
1367
1368
1369        // movement vector
1370        // - for previous(), 0 for current(), + for next()
1371        // not const so that we can terminate it below for the error message
1372        String moves="0+0+0--0-0-+++0--+++++++0--------";
1373
1374        // iterators
1375        StringCharacterIterator text = new StringCharacterIterator(new String(src));
1376        Normalizer iter = new Normalizer(text,Normalizer.NFD,0);
1377        UCharIterator iter32 = new UCharIterator(expect, expect.length,
1378                                                     EXPECT_MIDDLE);
1379
1380        int c1, c2;
1381        char m;
1382
1383        // initially set the indexes into the middle of the strings
1384        iter.setIndexOnly(SRC_MIDDLE);
1385
1386        // move around and compare the iteration code points with
1387        // the expected ones
1388        int movesIndex =0;
1389        while(movesIndex<moves.length()) {
1390            m=moves.charAt(movesIndex++);
1391            if(m=='-') {
1392                c1=iter.previous();
1393                c2=iter32.previous();
1394            } else if(m=='0') {
1395                c1=iter.current();
1396                c2=iter32.current();
1397            } else /* m=='+' */ {
1398                c1=iter.next();
1399                c2=iter32.next();
1400            }
1401
1402            // compare results
1403            if(c1!=c2) {
1404                // copy the moves until the current (m) move, and terminate
1405                String history = moves.substring(0,movesIndex);
1406                errln("error: mismatch in Normalizer iteration at "+history+": "
1407                      +"got c1= " + hex(c1) +" != expected c2= "+ hex(c2));
1408                break;
1409            }
1410
1411            // compare indexes
1412            if(iter.getIndex()!=expectIndex[iter32.getIndex()]) {
1413                // copy the moves until the current (m) move, and terminate
1414                String history = moves.substring(0,movesIndex);
1415                errln("error: index mismatch in Normalizer iteration at "
1416                      +history+ " : "+ "Normalizer index " +iter.getIndex()
1417                      +" expected "+ expectIndex[iter32.getIndex()]);
1418                break;
1419            }
1420        }
1421    }
1422
1423    // test APIs that are not otherwise used - improve test coverage
1424    @Test
1425    public void TestNormalizerAPI() throws Exception {
1426        try{
1427            // instantiate a Normalizer from a CharacterIterator
1428            String s=Utility.unescape("a\u0308\uac00\\U0002f800");
1429            // make s a bit longer and more interesting
1430            UCharacterIterator iter = UCharacterIterator.getInstance(s+s);
1431            Normalizer norm = new Normalizer(iter, Normalizer.NFC,0);
1432            if(norm.next()!=0xe4) {
1433                errln("error in Normalizer(CharacterIterator).next()");
1434            }
1435
1436            // test clone(), ==, and hashCode()
1437            Normalizer clone=(Normalizer)norm.clone();
1438            if(clone.equals(norm)) {
1439                errln("error in Normalizer(Normalizer(CharacterIterator)).clone()!=norm");
1440            }
1441
1442            if(clone.getLength()!= norm.getLength()){
1443               errln("error in Normalizer.getBeginIndex()");
1444            }
1445            // clone must have the same hashCode()
1446            //if(clone.hashCode()!=norm.hashCode()) {
1447            //    errln("error in Normalizer(Normalizer(CharacterIterator)).clone().hashCode()!=copy.hashCode()");
1448            //}
1449            if(clone.next()!=0xac00) {
1450                errln("error in Normalizer(Normalizer(CharacterIterator)).next()");
1451            }
1452            int ch = clone.next();
1453            if(ch!=0x4e3d) {
1454                errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next()");
1455            }
1456            // position changed, must change hashCode()
1457            if(clone.hashCode()==norm.hashCode()) {
1458                errln("error in Normalizer(Normalizer(CharacterIterator)).clone().next().hashCode()==copy.hashCode()");
1459            }
1460
1461            // test compose() and decompose()
1462            StringBuffer tel;
1463            String nfkc, nfkd;
1464            tel=new StringBuffer("\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121\u2121");
1465            tel.insert(1,(char)0x0301);
1466
1467            nfkc=Normalizer.compose(tel.toString(), true);
1468            nfkd=Normalizer.decompose(tel.toString(), true);
1469            if(
1470                !nfkc.equals(Utility.unescape("TE\u0139TELTELTELTELTELTELTELTELTEL"))||
1471                !nfkd.equals(Utility.unescape("TEL\u0301TELTELTELTELTELTELTELTELTEL"))
1472            ) {
1473                errln("error in Normalizer::(de)compose(): wrong result(s)");
1474            }
1475
1476            // test setIndex()
1477            ch=norm.setIndex(3);
1478            if(ch!=0x4e3d) {
1479               errln("error in Normalizer(CharacterIterator).setIndex(3)");
1480            }
1481
1482            // test setText(CharacterIterator) and getText()
1483            String out, out2;
1484            clone.setText(iter);
1485
1486            out = clone.getText();
1487            out2 = iter.getText();
1488            if( !out.equals(out2) ||
1489                clone.startIndex()!=0||
1490                clone.endIndex()!=iter.getLength()
1491            ) {
1492                errln("error in Normalizer::setText() or Normalizer::getText()");
1493            }
1494
1495            char[] fillIn1 = new char[clone.getLength()];
1496            char[] fillIn2 = new char[iter.getLength()];
1497            int len = clone.getText(fillIn1);
1498            iter.getText(fillIn2,0);
1499            if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1500                errln("error in Normalizer.getText(). Normalizer: "+
1501                                Utility.hex(new String(fillIn1))+
1502                                " Iter: " + Utility.hex(new String(fillIn2)));
1503            }
1504
1505            clone.setText(fillIn1);
1506            len = clone.getText(fillIn2);
1507            if(!Utility.arrayRegionMatches(fillIn1,0,fillIn2,0,len)){
1508                errln("error in Normalizer.setText() or Normalizer.getText()"+
1509                                Utility.hex(new String(fillIn1))+
1510                                " Iter: " + Utility.hex(new String(fillIn2)));
1511            }
1512
1513            // test setText(UChar *), getUMode() and setMode()
1514            clone.setText(s);
1515            clone.setIndexOnly(1);
1516            clone.setMode(Normalizer.NFD);
1517            if(clone.getMode()!=Normalizer.NFD) {
1518                errln("error in Normalizer::setMode() or Normalizer::getMode()");
1519            }
1520            if(clone.next()!=0x308 || clone.next()!=0x1100) {
1521                errln("error in Normalizer::setText() or Normalizer::setMode()");
1522            }
1523
1524            // test last()/previous() with an internal buffer overflow
1525            StringBuffer buf = new StringBuffer("aaaaaaaaaa");
1526            buf.setCharAt(10-1,'\u0308');
1527            clone.setText(buf);
1528            if(clone.last()!=0x308) {
1529                errln("error in Normalizer(10*U+0308).last()");
1530            }
1531
1532            // test UNORM_NONE
1533            norm.setMode(Normalizer.NONE);
1534            if(norm.first()!=0x61 || norm.next()!=0x308 || norm.last()!=0x2f800) {
1535                errln("error in Normalizer(UNORM_NONE).first()/next()/last()");
1536            }
1537            out=Normalizer.normalize(s, Normalizer.NONE);
1538            if(!out.equals(s)) {
1539                errln("error in Normalizer::normalize(UNORM_NONE)");
1540            }
1541            ch = 0x1D15E;
1542            String exp = "\\U0001D157\\U0001D165";
1543            String ns = Normalizer.normalize(ch,Normalizer.NFC);
1544            if(!ns.equals(Utility.unescape(exp))){
1545                errln("error in Normalizer.normalize(int,Mode)");
1546            }
1547            ns = Normalizer.normalize(ch,Normalizer.NFC,0);
1548            if(!ns.equals(Utility.unescape(exp))){
1549                errln("error in Normalizer.normalize(int,Mode,int)");
1550            }
1551        }catch(Exception e){
1552            throw e;
1553        }
1554    }
1555
1556    @Test
1557    public void TestConcatenate() {
1558
1559        Object[][]cases=new Object[][]{
1560            /* mode, left, right, result */
1561            {
1562                Normalizer.NFC,
1563                "re",
1564                "\u0301sum\u00e9",
1565                "r\u00e9sum\u00e9"
1566            },
1567            {
1568                Normalizer.NFC,
1569                "a\u1100",
1570                "\u1161bcdefghijk",
1571                "a\uac00bcdefghijk"
1572            },
1573            /* ### TODO: add more interesting cases */
1574            {
1575                Normalizer.NFD,
1576                "\u03B1\u0345",
1577                "\u0C4D\uD804\uDCBA\uD834\uDD69",  // 0C4D 110BA 1D169
1578                "\u03B1\uD834\uDD69\uD804\uDCBA\u0C4D\u0345"  // 03B1 1D169 110BA 0C4D 0345
1579            }
1580        };
1581
1582        String left, right, expect, result;
1583        Normalizer.Mode mode;
1584        int i;
1585
1586        /* test concatenation */
1587        for(i=0; i<cases.length; ++i) {
1588            mode = (Normalizer.Mode)cases[i][0];
1589
1590            left=(String)cases[i][1];
1591            right=(String)cases[i][2];
1592            expect=(String)cases[i][3];
1593            {
1594                result=Normalizer.concatenate(left, right, mode,0);
1595                if(!result.equals(expect)) {
1596                    errln("error in Normalizer.concatenate(), cases[] failed"
1597                          +", result==expect: expected: "
1598                          + hex(expect)+" =========> got: " + hex(result));
1599                }
1600            }
1601            {
1602                result=Normalizer.concatenate(left.toCharArray(), right.toCharArray(), mode,0);
1603                if(!result.equals(expect)) {
1604                    errln("error in Normalizer.concatenate(), cases[] failed"
1605                          +", result==expect: expected: "
1606                          + hex(expect)+" =========> got: " + hex(result));
1607                }
1608            }
1609        }
1610
1611        mode= Normalizer.NFC; // (Normalizer.Mode)cases2[0][0];
1612        char[] destination = "My resume is here".toCharArray();
1613        left = "resume";
1614        right = "re\u0301sum\u00e9 is HERE";
1615        expect = "My r\u00e9sum\u00e9 is HERE";
1616
1617        // Concatenates 're' with '\u0301sum\u00e9 is HERE' and places the result at
1618        // position 3 of string 'My resume is here'.
1619        Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1620                                         destination, 3, 17, mode, 0);
1621        if(!String.valueOf(destination).equals(expect)) {
1622            errln("error in Normalizer.concatenate(), cases2[] failed"
1623                  +", result==expect: expected: "
1624                  + hex(expect) + " =========> got: " + hex(destination));
1625        }
1626
1627        // Error case when result of concatenation won't fit into destination array.
1628        try {
1629            Normalizer.concatenate(left.toCharArray(), 0, 2, right.toCharArray(), 2, 15,
1630                                         destination, 3, 16, mode, 0);
1631        } catch (IndexOutOfBoundsException e) {
1632            assertTrue("Normalizer.concatenate() failed", e.getMessage().equals("14"));
1633            return;
1634        }
1635        fail("Normalizer.concatenate() tested for failure but passed");
1636    }
1637
1638    private final int RAND_MAX = 0x7fff;
1639
1640    @Test
1641    public void TestCheckFCD()
1642    {
1643      char[] FAST = {0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
1644                     0x0008, 0x0009, 0x000A};
1645
1646      char[] FALSE = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
1647                      0x02B9, 0x0314, 0x0315, 0x0316};
1648
1649      char[] TRUE = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
1650                     0x0050, 0x0730, 0x09EE, 0x1E10};
1651
1652      char[][] datastr= { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
1653                          {0x0061, 0x030A, 0x00E2, 0x0323, 0},
1654                          {0x0061, 0x0323, 0x00E2, 0x0323, 0},
1655                          {0x0061, 0x0323, 0x1E05, 0x0302, 0}
1656                        };
1657      Normalizer.QuickCheckResult result[] = {Normalizer.YES, Normalizer.NO, Normalizer.NO, Normalizer.YES};
1658
1659      char[] datachar= {        0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
1660                                0x6a,
1661                                0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
1662                                0xea,
1663                                0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
1664                                0x0307, 0x0308, 0x0309, 0x030a,
1665                                0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
1666                                0x0327, 0x0328, 0x0329, 0x032a,
1667                                0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
1668                                0x1e07, 0x1e08, 0x1e09, 0x1e0a
1669                       };
1670
1671      int count = 0;
1672
1673      if (Normalizer.quickCheck(FAST,0,FAST.length, Normalizer.FCD,0) != Normalizer.YES)
1674        errln("Normalizer.quickCheck(FCD) failed: expected value for fast Normalizer.quickCheck is Normalizer.YES\n");
1675      if (Normalizer.quickCheck(FALSE,0, FALSE.length,Normalizer.FCD,0) != Normalizer.NO)
1676        errln("Normalizer.quickCheck(FCD) failed: expected value for error Normalizer.quickCheck is Normalizer.NO\n");
1677      if (Normalizer.quickCheck(TRUE,0,TRUE.length,Normalizer.FCD,0) != Normalizer.YES)
1678        errln("Normalizer.quickCheck(FCD) failed: expected value for correct Normalizer.quickCheck is Normalizer.YES\n");
1679
1680
1681      while (count < 4)
1682      {
1683        Normalizer.QuickCheckResult fcdresult = Normalizer.quickCheck(datastr[count],0,datastr[count].length, Normalizer.FCD,0);
1684        if (result[count] != fcdresult) {
1685            errln("Normalizer.quickCheck(FCD) failed: Data set "+ count
1686                    + " expected value "+ result[count]);
1687        }
1688        count ++;
1689      }
1690
1691      /* random checks of long strings */
1692      //srand((unsigned)time( NULL ));
1693      Random rand = createRandom(); // use test framework's random
1694
1695      for (count = 0; count < 50; count ++)
1696      {
1697        int size = 0;
1698        Normalizer.QuickCheckResult testresult = Normalizer.YES;
1699        char[] data= new char[20];
1700        char[] norm= new char[100];
1701        char[] nfd = new char[100];
1702        int normStart = 0;
1703        int nfdsize = 0;
1704        while (size != 19) {
1705          data[size] = datachar[rand.nextInt(RAND_MAX)*50/RAND_MAX];
1706          logln("0x"+data[size]);
1707          normStart += Normalizer.normalize(data,size,size+1,
1708                                              norm,normStart,100,
1709                                              Normalizer.NFD,0);
1710          size ++;
1711        }
1712        logln("\n");
1713
1714        nfdsize = Normalizer.normalize(data,0,size, nfd,0,nfd.length,Normalizer.NFD,0);
1715        //    nfdsize = unorm_normalize(data, size, UNORM_NFD, UCOL_IGNORE_HANGUL,
1716        //                      nfd, 100, &status);
1717        if (nfdsize != normStart || Utility.arrayRegionMatches(nfd,0, norm,0,nfdsize) ==false) {
1718          testresult = Normalizer.NO;
1719        }
1720        if (testresult == Normalizer.YES) {
1721          logln("result Normalizer.YES\n");
1722        }
1723        else {
1724          logln("result Normalizer.NO\n");
1725        }
1726
1727        if (Normalizer.quickCheck(data,0,data.length, Normalizer.FCD,0) != testresult) {
1728          errln("Normalizer.quickCheck(FCD) failed: expected "+ testresult +" for random data: "+hex(new String(data)) );
1729        }
1730      }
1731    }
1732
1733
1734    // reference implementation of Normalizer::compare
1735    private int ref_norm_compare(String s1, String s2, int options) {
1736        String t1, t2,r1,r2;
1737
1738        int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1739
1740        if((options&Normalizer.COMPARE_IGNORE_CASE)!=0) {
1741            // NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
1742            r1 = Normalizer.decompose(s1,false,normOptions);
1743            r2 = Normalizer.decompose(s2,false,normOptions);
1744            r1 = UCharacter.foldCase(r1,options);
1745            r2 = UCharacter.foldCase(r2,options);
1746        }else{
1747            r1 = s1;
1748            r2 = s2;
1749        }
1750
1751        t1 = Normalizer.decompose(r1, false, normOptions);
1752        t2 = Normalizer.decompose(r2, false, normOptions);
1753
1754        if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1755            UTF16.StringComparator comp
1756                    = new UTF16.StringComparator(true, false,
1757                                     UTF16.StringComparator.FOLD_CASE_DEFAULT);
1758            return comp.compare(t1,t2);
1759        } else {
1760            return t1.compareTo(t2);
1761        }
1762
1763    }
1764
1765    // test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately
1766    private int norm_compare(String s1, String s2, int options) {
1767        int normOptions=options>>Normalizer.COMPARE_NORM_OPTIONS_SHIFT;
1768
1769        if( Normalizer.YES==Normalizer.quickCheck(s1,Normalizer.FCD,normOptions) &&
1770            Normalizer.YES==Normalizer.quickCheck(s2,Normalizer.FCD,normOptions)) {
1771            options|=Normalizer.INPUT_IS_FCD;
1772        }
1773
1774        int cmpStrings = Normalizer.compare(s1, s2, options);
1775        int cmpArrays = Normalizer.compare(
1776                s1.toCharArray(), 0, s1.length(),
1777                s2.toCharArray(), 0, s2.length(), options);
1778        assertEquals("compare strings == compare char arrays", cmpStrings, cmpArrays);
1779        return cmpStrings;
1780    }
1781
1782    // reference implementation of UnicodeString::caseCompare
1783    private int ref_case_compare(String s1, String s2, int options) {
1784        String t1, t2;
1785
1786        t1=s1;
1787        t2=s2;
1788
1789        t1 = UCharacter.foldCase(t1,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1790        t2 = UCharacter.foldCase(t2,((options&Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I)==0));
1791
1792        if((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) {
1793            UTF16.StringComparator comp
1794                    = new UTF16.StringComparator(true, false,
1795                                    UTF16.StringComparator.FOLD_CASE_DEFAULT);
1796            return comp.compare(t1,t2);
1797        } else {
1798            return t1.compareTo(t2);
1799        }
1800
1801    }
1802
1803    // reduce an integer to -1/0/1
1804    private static int sign(int value) {
1805        if(value==0) {
1806            return 0;
1807        } else {
1808            return (value>>31)|1;
1809        }
1810    }
1811    private static String signString(int value) {
1812        if(value<0) {
1813            return "<0";
1814        } else if(value==0) {
1815            return "=0";
1816        } else /* value>0 */ {
1817            return ">0";
1818        }
1819    }
1820    // test Normalizer::compare and unorm_compare (thinly wrapped by the former)
1821    // by comparing it with its semantic equivalent
1822    // since we trust the pieces, this is sufficient
1823
1824    // test each string with itself and each other
1825    // each time with all options
1826    private  String strings[]=new String[]{
1827                // some cases from NormalizationTest.txt
1828                // 0..3
1829                "D\u031B\u0307\u0323",
1830                "\u1E0C\u031B\u0307",
1831                "D\u031B\u0323\u0307",
1832                "d\u031B\u0323\u0307",
1833
1834                // 4..6
1835                "\u00E4",
1836                "a\u0308",
1837                "A\u0308",
1838
1839                // Angstrom sign = A ring
1840                // 7..10
1841                "\u212B",
1842                "\u00C5",
1843                "A\u030A",
1844                "a\u030A",
1845
1846                // 11.14
1847                "a\u059A\u0316\u302A\u032Fb",
1848                "a\u302A\u0316\u032F\u059Ab",
1849                "a\u302A\u0316\u032F\u059Ab",
1850                "A\u059A\u0316\u302A\u032Fb",
1851
1852                // from ICU case folding tests
1853                // 15..20
1854                "A\u00df\u00b5\ufb03\\U0001040c\u0131",
1855                "ass\u03bcffi\\U00010434i",
1856                "\u0061\u0042\u0131\u03a3\u00df\ufb03\ud93f\udfff",
1857                "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udfff",
1858                "\u0041\u0062\u0131\u03c3\u0053\u0073\u0066\u0046\u0069\ud93f\udfff",
1859                "\u0041\u0062\u0069\u03c3\u0073\u0053\u0046\u0066\u0049\ud93f\udffd",
1860
1861                //     U+d800 U+10001   see implementation comment in unorm_cmpEquivFold
1862                // vs. U+10000          at bottom - code point order
1863                // 21..22
1864                "\ud800\ud800\udc01",
1865                "\ud800\udc00",
1866
1867                // other code point order tests from ustrtest.cpp
1868                // 23..31
1869                "\u20ac\ud801",
1870                "\u20ac\ud800\udc00",
1871                "\ud800",
1872                "\ud800\uff61",
1873                "\udfff",
1874                "\uff61\udfff",
1875                "\uff61\ud800\udc02",
1876                "\ud800\udc02",
1877                "\ud84d\udc56",
1878
1879                // long strings, see cnormtst.c/TestNormCoverage()
1880                // equivalent if case-insensitive
1881                // 32..33
1882                "\uAD8B\uAD8B\uAD8B\uAD8B"+
1883                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1884                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1885                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1886                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1887                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1888                "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1889                "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1890                "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1891                "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1892                "\uAD8B\uAD8B\uAD8B\uAD8B"+
1893                "d\u031B\u0307\u0323",
1894
1895                "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1896                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1897                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1898                "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1899                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1900                "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e"+
1901                "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"+
1902                "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"+
1903                "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"+
1904                "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"+
1905                "\u1100\u116f\u11aa\uAD8B\uAD8B\u1100\u116f\u11aa"+
1906                "\u1E0C\u031B\u0307",
1907
1908                // some strings that may make a difference whether the compare function
1909                // case-folds or decomposes first
1910                // 34..41
1911                "\u0360\u0345\u0334",
1912                "\u0360\u03b9\u0334",
1913
1914                "\u0360\u1f80\u0334",
1915                "\u0360\u03b1\u0313\u03b9\u0334",
1916
1917                "\u0360\u1ffc\u0334",
1918                "\u0360\u03c9\u03b9\u0334",
1919
1920                "a\u0360\u0345\u0360\u0345b",
1921                "a\u0345\u0360\u0345\u0360b",
1922
1923                // interesting cases for canonical caseless match with turkic i handling
1924                // 42..43
1925                "\u00cc",
1926                "\u0069\u0300",
1927
1928                // strings with post-Unicode 3.2 normalization or normalization corrections
1929                // 44..45
1930                "\u00e4\u193b\\U0002f868",
1931                "\u0061\u193b\u0308\u36fc",
1932
1933
1934    };
1935
1936    // all combinations of options
1937    // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions
1938    final class Temp {
1939        int options;
1940        String name;
1941        public Temp(int opt,String str){
1942            options =opt;
1943            name = str;
1944        }
1945
1946    }
1947    // set UNORM_UNICODE_3_2 in one additional combination
1948
1949    private Temp[] opt = new Temp[]{
1950                    new Temp(0,"default"),
1951                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER, "code point order" ),
1952                    new Temp(Normalizer.COMPARE_IGNORE_CASE, "ignore case" ),
1953                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE, "code point order & ignore case" ),
1954                    new Temp(Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & special i"),
1955                    new Temp(Normalizer.COMPARE_CODE_POINT_ORDER|Normalizer.COMPARE_IGNORE_CASE|Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I, "code point order & ignore case & special i"),
1956                    new Temp(Normalizer.UNICODE_3_2 << Normalizer.COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2")
1957            };
1958
1959
1960    @Test
1961    public void TestCompareDebug(){
1962
1963        String[] s = new String[100]; // at least as many items as in strings[] !
1964
1965
1966        int i, j, k, count=strings.length;
1967        int result, refResult;
1968
1969        // create the UnicodeStrings
1970        for(i=0; i<count; ++i) {
1971            s[i]=Utility.unescape(strings[i]);
1972        }
1973        UTF16.StringComparator comp = new UTF16.StringComparator(true, false,
1974                                     UTF16.StringComparator.FOLD_CASE_DEFAULT);
1975        // test them each with each other
1976
1977        i = 42;
1978        j = 43;
1979        k = 2;
1980        // test Normalizer::compare
1981        result=norm_compare(s[i], s[j], opt[k].options);
1982        refResult=ref_norm_compare(s[i], s[j], opt[k].options);
1983        if(sign(result)!=sign(refResult)) {
1984            errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
1985        }
1986
1987        // test UnicodeString::caseCompare - same internal implementation function
1988         if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
1989        //    result=s[i]. (s[j], opt[k].options);
1990            if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
1991            {
1992                comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
1993            }
1994            else {
1995                comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
1996            }
1997
1998            result=comp.compare(s[i],s[j]);
1999            refResult=ref_case_compare(s[i], s[j], opt[k].options);
2000            if(sign(result)!=sign(refResult)) {
2001                      errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2002                            }
2003        }
2004        String value1 = "\u00dater\u00fd";
2005        String value2 = "\u00fater\u00fd";
2006        if(Normalizer.compare(value1,value2,0)!=0){
2007            if(Normalizer.compare(value1,value2,Normalizer.COMPARE_IGNORE_CASE)==0){
2008
2009            }
2010        }
2011    }
2012
2013    @Test
2014    public void TestCompare() {
2015
2016        String[] s = new String[100]; // at least as many items as in strings[] !
2017
2018        int i, j, k, count=strings.length;
2019        int result, refResult;
2020
2021        // create the UnicodeStrings
2022        for(i=0; i<count; ++i) {
2023            s[i]=Utility.unescape(strings[i]);
2024        }
2025        UTF16.StringComparator comp = new UTF16.StringComparator();
2026        // test them each with each other
2027        for(i=0; i<count; ++i) {
2028            for(j=i; j<count; ++j) {
2029                for(k=0; k<opt.length; ++k) {
2030                    // test Normalizer::compare
2031                    result=norm_compare(s[i], s[j], opt[k].options);
2032                    refResult=ref_norm_compare(s[i], s[j], opt[k].options);
2033                    if(sign(result)!=sign(refResult)) {
2034                        errln("Normalizer::compare( " + i +", "+j + ", " +k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2035                    }
2036
2037                    // test UnicodeString::caseCompare - same internal implementation function
2038                     if(0!=(opt[k].options&Normalizer.COMPARE_IGNORE_CASE)) {
2039                        //    result=s[i]. (s[j], opt[k].options);
2040                        if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2041                        {
2042                            comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2043                        }
2044                        else {
2045                            comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2046                        }
2047
2048                        comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2049                        // result=comp.caseCompare(s[i],s[j], opt[k].options);
2050                        result=comp.compare(s[i],s[j]);
2051                        refResult=ref_case_compare(s[i], s[j], opt[k].options);
2052                        if(sign(result)!=sign(refResult)) {
2053                                  errln("Normalizer::compare( " + i +", "+j + ", "+k+"( " +opt[k].name+"))=" + result +" should be same sign as " + refResult);
2054                                         }
2055                    }
2056                }
2057            }
2058        }
2059
2060        // test cases with i and I to make sure Turkic works
2061        char[] iI= new char[]{ 0x49, 0x69, 0x130, 0x131 };
2062        UnicodeSet set = new UnicodeSet(), iSet = new UnicodeSet();
2063        Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
2064        nfcImpl.ensureCanonIterData();
2065
2066        String s1, s2;
2067
2068        // collect all sets into one for contiguous output
2069        for(i=0; i<iI.length; ++i) {
2070            if(nfcImpl.getCanonStartSet(iI[i], iSet)) {
2071                set.addAll(iSet);
2072            }
2073        }
2074
2075        // test all of these precomposed characters
2076        Normalizer2 nfcNorm2 = Normalizer2.getNFCInstance();
2077        UnicodeSetIterator it = new UnicodeSetIterator(set);
2078        int c;
2079        while(it.next() && (c=it.codepoint)!=UnicodeSetIterator.IS_STRING) {
2080            s1 = UTF16.valueOf(c);
2081            s2 = nfcNorm2.getDecomposition(c);
2082            for(k=0; k<opt.length; ++k) {
2083                // test Normalizer::compare
2084
2085                result= norm_compare(s1, s2, opt[k].options);
2086                refResult=ref_norm_compare(s1, s2, opt[k].options);
2087                if(sign(result)!=sign(refResult)) {
2088                    errln("Normalizer.compare(U+"+hex(c)+" with its NFD, "+opt[k].name+")"
2089                          + signString(result)+" should be "+signString(refResult));
2090                }
2091
2092                // test UnicodeString::caseCompare - same internal implementation function
2093                if((opt[k].options & Normalizer.COMPARE_IGNORE_CASE)>0) {
2094                     if ((opt[k].options & Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I) == 0)
2095                    {
2096                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_DEFAULT);
2097                    }
2098                    else {
2099                        comp.setIgnoreCase(true, UTF16.StringComparator.FOLD_CASE_EXCLUDE_SPECIAL_I);
2100                    }
2101
2102                    comp.setCodePointCompare((opt[k].options & Normalizer.COMPARE_CODE_POINT_ORDER) != 0);
2103
2104                    result=comp.compare(s1,s2);
2105                    refResult=ref_case_compare(s1, s2, opt[k].options);
2106                    if(sign(result)!=sign(refResult)) {
2107                        errln("UTF16.compare(U+"+hex(c)+" with its NFD, "
2108                              +opt[k].name+")"+signString(result) +" should be "+signString(refResult));
2109                    }
2110                }
2111            }
2112        }
2113
2114        // test getDecomposition() for some characters that do not decompose
2115        if( nfcNorm2.getDecomposition(0x20)!=null ||
2116            nfcNorm2.getDecomposition(0x4e00)!=null ||
2117            nfcNorm2.getDecomposition(0x20002)!=null
2118        ) {
2119            errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions");
2120        }
2121
2122        // test getRawDecomposition() for some characters that do not decompose
2123        if( nfcNorm2.getRawDecomposition(0x20)!=null ||
2124            nfcNorm2.getRawDecomposition(0x4e00)!=null ||
2125            nfcNorm2.getRawDecomposition(0x20002)!=null
2126        ) {
2127            errln("getRawDecomposition() returns TRUE for characters which do not have decompositions");
2128        }
2129
2130        // test composePair() for some pairs of characters that do not compose
2131        if( nfcNorm2.composePair(0x20, 0x301)>=0 ||
2132            nfcNorm2.composePair(0x61, 0x305)>=0 ||
2133            nfcNorm2.composePair(0x1100, 0x1160)>=0 ||
2134            nfcNorm2.composePair(0xac00, 0x11a7)>=0
2135        ) {
2136            errln("NFC.composePair() incorrectly composes some pairs of characters");
2137        }
2138
2139        // test FilteredNormalizer2.getDecomposition()
2140        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff]");
2141        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2142        if(fn2.getDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getDecomposition(0x100))) {
2143            errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed");
2144        }
2145
2146        // test FilteredNormalizer2.getRawDecomposition()
2147        if(fn2.getRawDecomposition(0xe4)!=null || !"A\u0304".equals(fn2.getRawDecomposition(0x100))) {
2148            errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed");
2149        }
2150
2151        // test FilteredNormalizer2::composePair()
2152        if( 0x100!=fn2.composePair(0x41, 0x304) ||
2153            fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08
2154        ) {
2155            errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed");
2156        }
2157    }
2158
2159    // verify that case-folding does not un-FCD strings
2160    int countFoldFCDExceptions(int foldingOptions) {
2161        String s, d;
2162        int c;
2163        int count;
2164        int/*unsigned*/ cc, trailCC, foldCC, foldTrailCC;
2165        Normalizer.QuickCheckResult qcResult;
2166        int category;
2167        boolean isNFD;
2168
2169
2170        logln("Test if case folding may un-FCD a string (folding options 0x)"+hex(foldingOptions));
2171
2172        count=0;
2173        for(c=0; c<=0x10ffff; ++c) {
2174            category=UCharacter.getType(c);
2175            if(category==UCharacterCategory.UNASSIGNED) {
2176                continue; // skip unassigned code points
2177            }
2178            if(c==0xac00) {
2179                c=0xd7a3; // skip Hangul - no case folding there
2180                continue;
2181            }
2182            // skip Han blocks - no case folding there either
2183            if(c==0x3400) {
2184                c=0x4db5;
2185                continue;
2186            }
2187            if(c==0x4e00) {
2188                c=0x9fa5;
2189                continue;
2190            }
2191            if(c==0x20000) {
2192                c=0x2a6d6;
2193                continue;
2194            }
2195
2196            s= UTF16.valueOf(c);
2197
2198            // get leading and trailing cc for c
2199            d= Normalizer.decompose(s,false);
2200            isNFD= s==d;
2201            cc=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2202            trailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2203
2204            // get leading and trailing cc for the case-folding of c
2205            UCharacter.foldCase(s,(foldingOptions==0));
2206            d = Normalizer.decompose(s, false);
2207            foldCC=UCharacter.getCombiningClass(UTF16.charAt(d,0));
2208            foldTrailCC=UCharacter.getCombiningClass(UTF16.charAt(d,d.length()-1));
2209
2210            qcResult=Normalizer.quickCheck(s, Normalizer.FCD,0);
2211
2212
2213            // bad:
2214            // - character maps to empty string: adjacent characters may then need reordering
2215            // - folding has different leading/trailing cc's, and they don't become just 0
2216            // - folding itself is not FCD
2217            if( qcResult!=Normalizer.YES ||
2218                s.length()==0 ||
2219                (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0)
2220            ) {
2221                ++count;
2222                errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2223                //errln("  cc %02x trailCC %02x    foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x   quickCheck(folded)=%d", cc, trailCC, UTF16.charAt(d,0), foldCC, UTF16.charAt(d,d.length()-1), foldTrailCC, qcResult);
2224                continue;
2225            }
2226
2227            // also bad:
2228            // if a code point is in NFD but its case folding is not, then
2229            // unorm_compare will also fail
2230            if(isNFD && Normalizer.YES!=Normalizer.quickCheck(s, Normalizer.NFD,0)) {
2231                ++count;
2232                errln("U+"+hex(c)+": case-folding may un-FCD a string (folding options 0x"+hex(foldingOptions)+")");
2233            }
2234        }
2235
2236        logln("There are "+hex(count)+" code points for which case-folding may un-FCD a string (folding options"+foldingOptions+"x)" );
2237        return count;
2238    }
2239
2240    @Test
2241    public void TestFindFoldFCDExceptions() {
2242        int count;
2243
2244        count=countFoldFCDExceptions(0);
2245        count+=countFoldFCDExceptions(Normalizer.FOLD_CASE_EXCLUDE_SPECIAL_I);
2246        if(count>0) {
2247            //*
2248            //* If case-folding un-FCDs any strings, then unorm_compare() must be
2249            //* re-implemented.
2250            //* It currently assumes that one can check for FCD then case-fold
2251            //* and then still have FCD strings for raw decomposition without reordering.
2252            //*
2253            errln("error: There are "+count+" code points for which case-folding"+
2254                  " may un-FCD a string for all folding options.\n See comment"+
2255                  " in BasicNormalizerTest::FindFoldFCDExceptions()!");
2256        }
2257    }
2258
2259    @Test
2260    public void TestCombiningMarks(){
2261        String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
2262        String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
2263        String result = Normalizer.decompose(src,false);
2264        if(!expected.equals(result)){
2265            errln("Reordering of combining marks failed. Expected: "+Utility.hex(expected)+" Got: "+ Utility.hex(result));
2266        }
2267    }
2268
2269    /*
2270     * Re-enable this test when UTC fixes UAX 21
2271    @Test
2272    public void TestUAX21Failure(){
2273        final String[][] cases = new String[][]{
2274                {"\u0061\u0345\u0360\u0345\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2275                {"\u0061\u0345\u0345\u0360\u0062", "\u0061\u0360\u0345\u0345\u0062"},
2276                {"\u0061\u0345\u0360\u0362\u0360\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2277                {"\u0061\u0360\u0345\u0360\u0362\u0062", "\u0061\u0362\u0360\u0360\u0345\u0062"},
2278                {"\u0061\u0345\u0360\u0362\u0361\u0062", "\u0061\u0362\u0360\u0361\u0345\u0062"},
2279                {"\u0061\u0361\u0345\u0360\u0362\u0062", "\u0061\u0362\u0361\u0360\u0345\u0062"},
2280        };
2281        for(int i = 0; i< cases.length; i++){
2282            String s1 =cases[0][0];
2283            String s2 = cases[0][1];
2284            if( (Normalizer.compare(s1,s2,Normalizer.FOLD_CASE_DEFAULT ==0)//case sensitive compare
2285                &&
2286                (Normalizer.compare(s1,s2,Normalizer.COMPARE_IGNORE_CASE)!=0)){
2287                errln("Normalizer.compare() failed for s1: "
2288                        + Utility.hex(s1) +" s2: " + Utility.hex(s2));
2289            }
2290        }
2291    }
2292    */
2293    @Test
2294    public void TestFCNFKCClosure() {
2295        final class TestStruct{
2296            int c;
2297            String s;
2298            TestStruct(int cp, String src){
2299                c=cp;
2300                s=src;
2301            }
2302        }
2303
2304        TestStruct[] tests= new TestStruct[]{
2305            new TestStruct( 0x00C4, "" ),
2306            new TestStruct( 0x00E4, "" ),
2307            new TestStruct( 0x037A, "\u0020\u03B9" ),
2308            new TestStruct( 0x03D2, "\u03C5" ),
2309            new TestStruct( 0x20A8, "\u0072\u0073" ) ,
2310            new TestStruct( 0x210B, "\u0068" ),
2311            new TestStruct( 0x210C, "\u0068" ),
2312            new TestStruct( 0x2121, "\u0074\u0065\u006C" ),
2313            new TestStruct( 0x2122, "\u0074\u006D" ),
2314            new TestStruct( 0x2128, "\u007A" ),
2315            new TestStruct( 0x1D5DB,"\u0068" ),
2316            new TestStruct( 0x1D5ED,"\u007A" ),
2317            new TestStruct( 0x0061, "" )
2318        };
2319
2320
2321        for(int i = 0; i < tests.length; ++ i) {
2322            String result=Normalizer.getFC_NFKC_Closure(tests[i].c);
2323            if(!result.equals(new String(tests[i].s))) {
2324                errln("getFC_NFKC_Closure(U+"+Integer.toHexString(tests[i].c)+") is wrong");
2325            }
2326        }
2327
2328        /* error handling */
2329
2330        int length=Normalizer.getFC_NFKC_Closure(0x5c, null);
2331        if(length!=0){
2332            errln("getFC_NFKC_Closure did not perform error handling correctly");
2333        }
2334    }
2335    @Test
2336    public void TestBugJ2324(){
2337       /* String[] input = new String[]{
2338                            //"\u30FD\u3099",
2339                            "\u30FA\u309A",
2340                            "\u30FB\u309A",
2341                            "\u30FC\u309A",
2342                            "\u30FE\u309A",
2343                            "\u30FD\u309A",
2344
2345        };*/
2346        String troublesome = "\u309A";
2347        for(int i=0x3000; i<0x3100;i++){
2348            String input = ((char)i)+troublesome;
2349            try{
2350              /*  String result =*/ Normalizer.compose(input,false);
2351            }catch(IndexOutOfBoundsException e){
2352                errln("compose() failed for input: " + Utility.hex(input) + " Exception: " + e.toString());
2353            }
2354        }
2355
2356    }
2357
2358    static final int D = 0, C = 1, KD= 2, KC = 3, FCD=4, NONE=5;
2359
2360    private static UnicodeSet[] initSkippables(UnicodeSet[] skipSets) {
2361        skipSets[D].applyPattern("[[:NFD_QC=Yes:]&[:ccc=0:]]", false);
2362        skipSets[C].applyPattern("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2363        skipSets[KD].applyPattern("[[:NFKD_QC=Yes:]&[:ccc=0:]]", false);
2364        skipSets[KC].applyPattern("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]", false);
2365
2366        // Remove from the NFC and NFKC sets all those characters that change
2367        // when a back-combining character is added.
2368        // First, get all of the back-combining characters and their combining classes.
2369        UnicodeSet combineBack=new UnicodeSet("[:NFC_QC=Maybe:]");
2370        int numCombineBack=combineBack.size();
2371        int[] combineBackCharsAndCc=new int[numCombineBack*2];
2372        UnicodeSetIterator iter=new UnicodeSetIterator(combineBack);
2373        for(int i=0; i<numCombineBack; ++i) {
2374            iter.next();
2375            int c=iter.codepoint;
2376            combineBackCharsAndCc[2*i]=c;
2377            combineBackCharsAndCc[2*i+1]=UCharacter.getCombiningClass(c);
2378        }
2379
2380        // We need not look at control codes, Han characters nor Hangul LVT syllables because they
2381        // do not combine forward. LV syllables are already removed.
2382        UnicodeSet notInteresting=new UnicodeSet("[[:C:][:Unified_Ideograph:][:HST=LVT:]]");
2383        UnicodeSet unsure=((UnicodeSet)(skipSets[C].clone())).removeAll(notInteresting);
2384        // System.out.format("unsure.size()=%d\n", unsure.size());
2385
2386        // For each character about which we are unsure, see if it changes when we add
2387        // one of the back-combining characters.
2388        Normalizer2 norm2=Normalizer2.getNFCInstance();
2389        StringBuilder s=new StringBuilder();
2390        iter.reset(unsure);
2391        while(iter.next()) {
2392            int c=iter.codepoint;
2393            s.delete(0, 0x7fffffff).appendCodePoint(c);
2394            int cLength=s.length();
2395            int tccc=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
2396            for(int i=0; i<numCombineBack; ++i) {
2397                // If c's decomposition ends with a character with non-zero combining class, then
2398                // c can only change if it combines with a character with a non-zero combining class.
2399                int cc2=combineBackCharsAndCc[2*i+1];
2400                if(tccc==0 || cc2!=0) {
2401                    int c2=combineBackCharsAndCc[2*i];
2402                    s.appendCodePoint(c2);
2403                    if(!norm2.isNormalized(s)) {
2404                        // System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2);
2405                        skipSets[C].remove(c);
2406                        skipSets[KC].remove(c);
2407                        break;
2408                    }
2409                    s.delete(cLength, 0x7fffffff);
2410                }
2411            }
2412        }
2413        return skipSets;
2414    }
2415
2416    @Test
2417    public void TestSkippable() {
2418        UnicodeSet[] skipSets = new UnicodeSet[] {
2419            new UnicodeSet(), //NFD
2420            new UnicodeSet(), //NFC
2421            new UnicodeSet(), //NFKD
2422            new UnicodeSet()  //NFKC
2423        };
2424        UnicodeSet[] expectSets = new UnicodeSet[] {
2425            new UnicodeSet(),
2426            new UnicodeSet(),
2427            new UnicodeSet(),
2428            new UnicodeSet()
2429        };
2430        StringBuilder s, pattern;
2431
2432        // build NF*Skippable sets from runtime data
2433        skipSets[D].applyPattern("[:NFD_Inert:]");
2434        skipSets[C].applyPattern("[:NFC_Inert:]");
2435        skipSets[KD].applyPattern("[:NFKD_Inert:]");
2436        skipSets[KC].applyPattern("[:NFKC_Inert:]");
2437
2438        expectSets = initSkippables(expectSets);
2439        if(expectSets[D].contains(0x0350)){
2440            errln("expectSets[D] contains 0x0350");
2441        }
2442        for(int i=0; i<expectSets.length; ++i) {
2443            if(!skipSets[i].equals(expectSets[i])) {
2444                errln("error: TestSkippable skipSets["+i+"]!=expectedSets["+i+"]\n");
2445                // Note: This used to depend on hardcoded UnicodeSet patterns generated by
2446                // Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by
2447                // running com.ibm.text.UCD.Main with the option NFSkippable.
2448                // Since ICU 4.6/Unicode 6, we are generating the
2449                // expectSets ourselves in initSkippables().
2450
2451                s=new StringBuilder();
2452
2453                s.append("\n\nskip=       ");
2454                s.append(skipSets[i].toPattern(true));
2455                s.append("\n\n");
2456
2457                s.append("skip-expect=");
2458                pattern = new StringBuilder(((UnicodeSet)skipSets[i].clone()).removeAll(expectSets[i]).toPattern(true));
2459                s.append(pattern);
2460
2461                pattern.delete(0,pattern.length());
2462                s.append("\n\nexpect-skip=");
2463                pattern = new StringBuilder(((UnicodeSet)expectSets[i].clone()).removeAll(skipSets[i]).toPattern(true));
2464                s.append(pattern);
2465                s.append("\n\n");
2466
2467                pattern.delete(0,pattern.length());
2468                s.append("\n\nintersection(expect,skip)=");
2469                UnicodeSet intersection  = ((UnicodeSet) expectSets[i].clone()).retainAll(skipSets[i]);
2470                pattern = new StringBuilder(intersection.toPattern(true));
2471                s.append(pattern);
2472                // Special: test coverage for append(char).
2473                s.append('\n');
2474                s.append('\n');
2475
2476                errln(s.toString());
2477            }
2478        }
2479    }
2480
2481    @Test
2482    public void TestBugJ2068(){
2483        String sample = "The quick brown fox jumped over the lazy dog";
2484        UCharacterIterator text = UCharacterIterator.getInstance(sample);
2485        Normalizer norm = new Normalizer(text,Normalizer.NFC,0);
2486        text.setIndex(4);
2487        if(text.current() == norm.current()){
2488            errln("Normalizer is not cloning the UCharacterIterator");
2489        }
2490     }
2491    @Test
2492     public void TestGetCombiningClass(){
2493        for(int i=0;i<0x10FFFF;i++){
2494            int cc = UCharacter.getCombiningClass(i);
2495            if(0xD800<= i && i<=0xDFFF && cc >0 ){
2496                cc = UCharacter.getCombiningClass(i);
2497                errln("CC: "+ cc + " for codepoint: " +Utility.hex(i,8));
2498            }
2499        }
2500    }
2501
2502    @Test
2503    public void TestSerializedSet(){
2504        USerializedSet sset=new USerializedSet();
2505        UnicodeSet set = new UnicodeSet();
2506        int start, end;
2507
2508        char[] serialized = {
2509            0x8007,  // length
2510            3,  // bmpLength
2511            0xc0, 0xfe, 0xfffc,
2512            1, 9, 0x10, 0xfffc
2513        };
2514        sset.getSet(serialized, 0);
2515
2516        // collect all sets into one for contiguous output
2517        int[] startEnd = new int[2];
2518        int count=sset.countRanges();
2519        for(int j=0; j<count; ++j) {
2520            sset.getRange(j, startEnd);
2521            set.add(startEnd[0], startEnd[1]);
2522        }
2523
2524        // test all of these characters
2525        UnicodeSetIterator it = new UnicodeSetIterator(set);
2526        while(it.nextRange() && it.codepoint!=UnicodeSetIterator.IS_STRING) {
2527            start=it.codepoint;
2528            end=it.codepointEnd;
2529            while(start<=end) {
2530                if(!sset.contains(start)){
2531                    errln("USerializedSet.contains failed for "+Utility.hex(start,8));
2532                }
2533                ++start;
2534            }
2535        }
2536    }
2537
2538    @Test
2539    public void TestReturnFailure(){
2540        char[] term = {'r','\u00e9','s','u','m','\u00e9' };
2541        char[] decomposed_term = new char[10 + term.length + 2];
2542        int rc = Normalizer.decompose(term,0,term.length, decomposed_term,0,decomposed_term.length,true, 0);
2543        int rc1 = Normalizer.decompose(term,0,term.length, decomposed_term,10,decomposed_term.length,true, 0);
2544        if(rc!=rc1){
2545            errln("Normalizer decompose did not return correct length");
2546        }
2547    }
2548
2549    private final static class TestCompositionCase {
2550        public Normalizer.Mode mode;
2551        public int options;
2552        public String input, expect;
2553        TestCompositionCase(Normalizer.Mode mode, int options, String input, String expect) {
2554            this.mode=mode;
2555            this.options=options;
2556            this.input=input;
2557            this.expect=expect;
2558        }
2559    }
2560
2561    @Test
2562    public void TestComposition() {
2563        final TestCompositionCase cases[]=new TestCompositionCase[]{
2564            /*
2565             * special cases for UAX #15 bug
2566             * see Unicode Corrigendum #5: Normalization Idempotency
2567             * at http://unicode.org/versions/corrigendum5.html
2568             * (was Public Review Issue #29)
2569             */
2570            new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327",      "\u1100\u0300\u1161\u0327"),
2571            new TestCompositionCase(Normalizer.NFC, 0, "\u1100\u0300\u1161\u0327\u11a8","\u1100\u0300\u1161\u0327\u11a8"),
2572            new TestCompositionCase(Normalizer.NFC, 0, "\uac00\u0300\u0327\u11a8",      "\uac00\u0327\u0300\u11a8"),
2573            new TestCompositionCase(Normalizer.NFC, 0, "\u0b47\u0300\u0b3e",            "\u0b47\u0300\u0b3e"),
2574
2575            /* TODO: add test cases for UNORM_FCC here (j2151) */
2576        };
2577
2578        String output;
2579        int i;
2580
2581        for(i=0; i<cases.length; ++i) {
2582            output=Normalizer.normalize(cases[i].input, cases[i].mode, cases[i].options);
2583            if(!output.equals(cases[i].expect)) {
2584                errln("unexpected result for case "+i);
2585            }
2586        }
2587    }
2588
2589    @Test
2590    public void TestGetDecomposition() {
2591        Normalizer2 n2=Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2592        String decomp=n2.getDecomposition(0x20);
2593        assertEquals("fcc.getDecomposition(space) failed", null, decomp);
2594        decomp=n2.getDecomposition(0xe4);
2595        assertEquals("fcc.getDecomposition(a-umlaut) failed", "a\u0308", decomp);
2596        decomp=n2.getDecomposition(0xac01);
2597        assertEquals("fcc.getDecomposition(Hangul syllable U+AC01) failed", "\u1100\u1161\u11a8", decomp);
2598    }
2599
2600    @Test
2601    public void TestGetRawDecomposition() {
2602        Normalizer2 n2=Normalizer2.getNFKCInstance();
2603        /*
2604         * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
2605         * without recursive decomposition.
2606         */
2607
2608        String decomp=n2.getRawDecomposition(0x20);
2609        assertEquals("nfkc.getRawDecomposition(space) failed", null, decomp);
2610        decomp=n2.getRawDecomposition(0xe4);
2611        assertEquals("nfkc.getRawDecomposition(a-umlaut) failed", "a\u0308", decomp);
2612        /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
2613        decomp=n2.getRawDecomposition(0x1e08);
2614        assertEquals("nfkc.getRawDecomposition(c-cedilla-acute) failed", "\u00c7\u0301", decomp);
2615        /* U+212B ANGSTROM SIGN */
2616        decomp=n2.getRawDecomposition(0x212b);
2617        assertEquals("nfkc.getRawDecomposition(angstrom sign) failed", "\u00c5", decomp);
2618        decomp=n2.getRawDecomposition(0xac00);
2619        assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC00) failed", "\u1100\u1161", decomp);
2620        /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
2621        decomp=n2.getRawDecomposition(0xac01);
2622        assertEquals("nfkc.getRawDecomposition(Hangul syllable U+AC01) failed", "\uac00\u11a8", decomp);
2623    }
2624
2625    @Test
2626    public void TestCustomComp() {
2627        String [][] pairs={
2628            { "\\uD801\\uE000\\uDFFE", "" },
2629            { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2630            { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2631            { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" },
2632            { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2633            { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2634            { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2635            { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2636        };
2637        Normalizer2 customNorm2;
2638        customNorm2=
2639            Normalizer2.getInstance(
2640                BasicTest.class.getResourceAsStream("/android/icu/dev/data/testdata/testnorm.nrm"),
2641                "testnorm",
2642                Normalizer2.Mode.COMPOSE);
2643        for(int i=0; i<pairs.length; ++i) {
2644            String[] pair=pairs[i];
2645            String input=Utility.unescape(pair[0]);
2646            String expected=Utility.unescape(pair[1]);
2647            String result=customNorm2.normalize(input);
2648            if(!result.equals(expected)) {
2649                errln("custom compose Normalizer2 did not normalize input "+i+" as expected");
2650            }
2651        }
2652    }
2653
2654    @Test
2655    public void TestCustomFCC() {
2656        String[][] pairs={
2657            { "\\uD801\\uE000\\uDFFE", "" },
2658            { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" },
2659            { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" },
2660            // The following expected result is different from CustomComp
2661            // because of only-contiguous composition.
2662            { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" },
2663            { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" },
2664            { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" },
2665            { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" },
2666            { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" }
2667        };
2668        Normalizer2 customNorm2;
2669        customNorm2=
2670            Normalizer2.getInstance(
2671                BasicTest.class.getResourceAsStream("/android/icu/dev/data/testdata/testnorm.nrm"),
2672                "testnorm",
2673                Normalizer2.Mode.COMPOSE_CONTIGUOUS);
2674        for(int i=0; i<pairs.length; ++i) {
2675            String[] pair=pairs[i];
2676            String input=Utility.unescape(pair[0]);
2677            String expected=Utility.unescape(pair[1]);
2678            String result=customNorm2.normalize(input);
2679            if(!result.equals(expected)) {
2680                errln("custom FCC Normalizer2 did not normalize input "+i+" as expected");
2681            }
2682        }
2683    }
2684
2685    @Test
2686    public void TestCanonIterData() {
2687        // For now, just a regression test.
2688        Normalizer2Impl impl=Norm2AllModes.getNFCInstance().impl.ensureCanonIterData();
2689        // U+0FB5 TIBETAN SUBJOINED LETTER SSA is the trailing character
2690        // in some decomposition mappings where there is a composition exclusion.
2691        // In fact, U+0FB5 is normalization-inert (NFC_QC=Yes, NFD_QC=Yes, ccc=0)
2692        // but it is not a segment starter because it occurs in a decomposition mapping.
2693        if(impl.isCanonSegmentStarter(0xfb5)) {
2694            errln("isCanonSegmentStarter(U+0fb5)=true is wrong");
2695        }
2696        // For [:Segment_Starter:] to work right, not just the property function has to work right,
2697        // UnicodeSet also needs a correct range starts set.
2698        UnicodeSet segStarters=new UnicodeSet("[:Segment_Starter:]").freeze();
2699        if(segStarters.contains(0xfb5)) {
2700            errln("[:Segment_Starter:].contains(U+0fb5)=true is wrong");
2701        }
2702        // Try characters up to Kana and miscellaneous CJK but below Han (for expediency).
2703        for(int c=0; c<=0x33ff; ++c) {
2704            boolean isStarter=impl.isCanonSegmentStarter(c);
2705            boolean isContained=segStarters.contains(c);
2706            if(isStarter!=isContained) {
2707                errln(String.format(
2708                        "discrepancy: isCanonSegmentStarter(U+%04x)=%5b != " +
2709                        "[:Segment_Starter:].contains(same)",
2710                        c, isStarter));
2711            }
2712        }
2713    }
2714
2715    @Test
2716    public void TestFilteredNormalizer2() {
2717        Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2718        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2719        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2720        int c;
2721        for(c=0; c<=0x3ff; ++c) {
2722            int expectedCC= filter.contains(c) ? nfcNorm2.getCombiningClass(c) : 0;
2723            int cc=fn2.getCombiningClass(c);
2724            assertEquals(
2725                    "FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+"+hex(c)+
2726                    ")==filtered NFC.getCC()",
2727                    expectedCC, cc);
2728        }
2729
2730        // More coverage.
2731        StringBuilder sb=new StringBuilder();
2732        assertEquals("filtered normalize()", "ää\u0304",
2733                fn2.normalize("a\u0308ä\u0304", (Appendable)sb).toString());
2734        assertTrue("filtered hasBoundaryAfter()", fn2.hasBoundaryAfter('ä'));
2735        assertTrue("filtered isInert()", fn2.isInert(0x0313));
2736    }
2737
2738    @Test
2739    public void TestFilteredAppend() {
2740        Normalizer2 nfcNorm2=Normalizer2.getNFCInstance();
2741        UnicodeSet filter=new UnicodeSet("[^\u00a0-\u00ff\u0310-\u031f]");
2742        FilteredNormalizer2 fn2=new FilteredNormalizer2(nfcNorm2, filter);
2743
2744        // Append two strings that each contain a character outside the filter set.
2745        StringBuilder sb = new StringBuilder("a\u0313a");
2746        String second = "\u0301\u0313";
2747        assertEquals("append()", "a\u0313á\u0313", fn2.append(sb, second).toString());
2748
2749        // Same, and also normalize the second string.
2750        sb.replace(0, 0x7fffffff, "a\u0313a");
2751        assertEquals(
2752            "normalizeSecondAndAppend()",
2753            "a\u0313á\u0313", fn2.normalizeSecondAndAppend(sb, second).toString());
2754
2755        // Normalizer2.normalize(String) uses spanQuickCheckYes() and normalizeSecondAndAppend().
2756        assertEquals("normalize()", "a\u0313á\u0313", fn2.normalize("a\u0313a\u0301\u0313"));
2757    }
2758
2759    @Test
2760    public void TestGetEasyToUseInstance() {
2761        // Test input string:
2762        // U+00A0 -> <noBreak> 0020
2763        // U+00C7 0301 = 1E08 = 0043 0327 0301
2764        String in="\u00A0\u00C7\u0301";
2765        Normalizer2 n2=Normalizer2.getNFCInstance();
2766        String out=n2.normalize(in);
2767        assertEquals(
2768                "getNFCInstance() did not return an NFC instance " +
2769                "(normalizes to " + prettify(out) + ')',
2770                "\u00A0\u1E08", out);
2771
2772        n2=Normalizer2.getNFDInstance();
2773        out=n2.normalize(in);
2774        assertEquals(
2775                "getNFDInstance() did not return an NFD instance " +
2776                "(normalizes to " + prettify(out) + ')',
2777                "\u00A0C\u0327\u0301", out);
2778
2779        n2=Normalizer2.getNFKCInstance();
2780        out=n2.normalize(in);
2781        assertEquals(
2782                "getNFKCInstance() did not return an NFKC instance " +
2783                "(normalizes to " + prettify(out) + ')',
2784                " \u1E08", out);
2785
2786        n2=Normalizer2.getNFKDInstance();
2787        out=n2.normalize(in);
2788        assertEquals(
2789                "getNFKDInstance() did not return an NFKD instance " +
2790                "(normalizes to " + prettify(out) + ')',
2791                " C\u0327\u0301", out);
2792
2793        n2=Normalizer2.getNFKCCasefoldInstance();
2794        out=n2.normalize(in);
2795        assertEquals(
2796                "getNFKCCasefoldInstance() did not return an NFKC_Casefold instance " +
2797                "(normalizes to " + prettify(out) + ')',
2798                " \u1E09", out);
2799    }
2800
2801    @Test
2802    public void TestNFC() {
2803        // Coverage tests.
2804        Normalizer2 nfc = Normalizer2.getNFCInstance();
2805        assertTrue("nfc.hasBoundaryAfter(space)", nfc.hasBoundaryAfter(' '));
2806        assertFalse("nfc.hasBoundaryAfter(ä)", nfc.hasBoundaryAfter('ä'));
2807    }
2808
2809    @Test
2810    public void TestNFD() {
2811        // Coverage tests.
2812        Normalizer2 nfd = Normalizer2.getNFDInstance();
2813        assertTrue("nfd.hasBoundaryAfter(space)", nfd.hasBoundaryAfter(' '));
2814        assertFalse("nfd.hasBoundaryAfter(ä)", nfd.hasBoundaryAfter('ä'));
2815    }
2816
2817    @Test
2818    public void TestFCD() {
2819        // Coverage tests.
2820        Normalizer2 fcd = Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.FCD);
2821        assertTrue("fcd.hasBoundaryAfter(space)", fcd.hasBoundaryAfter(' '));
2822        assertFalse("fcd.hasBoundaryAfter(ä)", fcd.hasBoundaryAfter('ä'));
2823        assertTrue("fcd.isInert(space)", fcd.isInert(' '));
2824        assertFalse("fcd.isInert(ä)", fcd.isInert('ä'));
2825
2826        // This implementation method is unreachable via public API.
2827        Norm2AllModes.FCDNormalizer2 impl = (Norm2AllModes.FCDNormalizer2)fcd;
2828        assertEquals("fcd impl.getQuickCheck(space)", 1, impl.getQuickCheck(' '));
2829        assertEquals("fcd impl.getQuickCheck(ä)", 0, impl.getQuickCheck('ä'));
2830    }
2831
2832    @Test
2833    public void TestNoneNormalizer() {
2834        // Use the deprecated Mode Normalizer.NONE for coverage of the internal NoopNormalizer2
2835        // as far as its methods are reachable that way.
2836        assertEquals("NONE.concatenate()", "ä\u0327",
2837                Normalizer.concatenate("ä", "\u0327", Normalizer.NONE, 0));
2838        assertTrue("NONE.isNormalized()", Normalizer.isNormalized("ä\u0327", Normalizer.NONE, 0));
2839    }
2840
2841    @Test
2842    public void TestNoopNormalizer2() {
2843        // Use the internal class directly for coverage of methods that are not publicly reachable.
2844        Normalizer2 noop = Norm2AllModes.NOOP_NORMALIZER2;
2845        assertEquals("noop.normalizeSecondAndAppend()", "ä\u0327",
2846                noop.normalizeSecondAndAppend(new StringBuilder("ä"), "\u0327").toString());
2847        assertEquals("noop.getDecomposition()", null, noop.getDecomposition('ä'));
2848        assertTrue("noop.hasBoundaryAfter()", noop.hasBoundaryAfter(0x0308));
2849        assertTrue("noop.isInert()", noop.isInert(0x0308));
2850    }
2851
2852    /*
2853     * This unit test covers two 'get' methods in class Normalizer2Impl. It only tests that
2854     * an object is returned.
2855     */
2856    @Test
2857    public void TestGetsFromImpl() {
2858       Normalizer2Impl nfcImpl = Norm2AllModes.getNFCInstance().impl;
2859       assertNotEquals("getNormTrie() returns null", null, nfcImpl.getNormTrie());
2860       assertNotEquals("getFCD16FromBelow180() returns null", null,
2861                       nfcImpl.getFCD16FromBelow180(0));
2862    }
2863
2864    /*
2865     * Abstract class Normalizer2 has non-abstract methods which are overwritten by
2866     * its derived classes. To test these methods a derived class is defined here.
2867     */
2868    public class TestNormalizer2 extends Normalizer2 {
2869
2870        public TestNormalizer2() {}
2871        @Override
2872        public StringBuilder normalize(CharSequence src, StringBuilder dest) { return null; }
2873        @Override
2874        public Appendable normalize(CharSequence src, Appendable dest) { return null; }
2875        @Override
2876        public StringBuilder normalizeSecondAndAppend(
2877            StringBuilder first, CharSequence second) { return null; }
2878        @Override
2879        public StringBuilder append(StringBuilder first, CharSequence second) { return null; }
2880        @Override
2881        public String getDecomposition(int c) { return null; }
2882        @Override
2883        public boolean isNormalized(CharSequence s) { return false; }
2884        @Override
2885        public Normalizer.QuickCheckResult quickCheck(CharSequence s) { return null; }
2886        @Override
2887        public int spanQuickCheckYes(CharSequence s) { return 0; }
2888        @Override
2889        public boolean hasBoundaryBefore(int c) { return false; }
2890        @Override
2891        public boolean hasBoundaryAfter(int c) { return false; }
2892        @Override
2893        public boolean isInert(int c) { return false; }
2894    }
2895
2896    final TestNormalizer2 tnorm2 = new TestNormalizer2();
2897    @Test
2898    public void TestGetRawDecompositionBase() {
2899        int c = 'à';
2900        assertEquals("Unexpected value returned from Normalizer2.getRawDecomposition()",
2901                     null, tnorm2.getRawDecomposition(c));
2902    }
2903
2904    @Test
2905    public void TestComposePairBase() {
2906        int a = 'a';
2907        int b = '\u0300';
2908        assertEquals("Unexpected value returned from Normalizer2.composePair()",
2909                     -1, tnorm2.composePair(a, b));
2910    }
2911
2912    @Test
2913    public void TestGetCombiningClassBase() {
2914        int c = '\u00e0';
2915        assertEquals("Unexpected value returned from Normalizer2.getCombiningClass()",
2916                     0, tnorm2.getCombiningClass(c));
2917    }
2918}
2919