1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2008-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9package com.ibm.icu.dev.test.collator;
10import java.util.ArrayList;
11import java.util.Arrays;
12import java.util.Collection;
13import java.util.Iterator;
14import java.util.LinkedHashSet;
15import java.util.List;
16import java.util.Locale;
17import java.util.Set;
18import java.util.TreeSet;
19
20import org.junit.Test;
21
22import com.ibm.icu.dev.test.TestFmwk;
23import com.ibm.icu.dev.util.CollectionUtilities;
24import com.ibm.icu.impl.ICUDebug;
25import com.ibm.icu.impl.Row;
26import com.ibm.icu.impl.Row.R4;
27import com.ibm.icu.lang.UCharacter;
28import com.ibm.icu.lang.UProperty;
29import com.ibm.icu.lang.UScript;
30import com.ibm.icu.text.AlphabeticIndex;
31import com.ibm.icu.text.AlphabeticIndex.Bucket;
32import com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType;
33import com.ibm.icu.text.AlphabeticIndex.ImmutableIndex;
34import com.ibm.icu.text.AlphabeticIndex.Record;
35import com.ibm.icu.text.Collator;
36import com.ibm.icu.text.Normalizer2;
37import com.ibm.icu.text.RawCollationKey;
38import com.ibm.icu.text.RuleBasedCollator;
39import com.ibm.icu.text.UTF16;
40import com.ibm.icu.text.UnicodeSet;
41import com.ibm.icu.util.ULocale;
42
43/**
44 * @author Mark Davis
45 */
46public class AlphabeticIndexTest extends TestFmwk {
47    /**
48     *
49     */
50    private static final String ARROW = "\u2192";
51    private static final boolean DEBUG = ICUDebug.enabled("alphabeticindex");
52
53    public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList(
54            "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl",
55            "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da",
56            "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr",
57            "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk",
58            "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta",
59            "te", "mr", "ur", "ml", "kn", "gu", "or"));
60    private String[][] localeAndIndexCharactersLists = new String[][] {
61            /* Arabic*/ {"ar", "\u0627:\u0628:\u062A:\u062B:\u062C:\u062D:\u062E:\u062F:\u0630:\u0631:\u0632:\u0633:\u0634:\u0635:\u0636:\u0637:\u0638:\u0639:\u063A:\u0641:\u0642:\u0643:\u0644:\u0645:\u0646:\u0647:\u0648:\u064A"},
62            /* Bulgarian*/  {"bg", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
63            /* Catalan*/    {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
64            /* Czech*/  {"cs", "A:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\u0158:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
65            /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
66            /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
67            /* Greek*/  {"el", "\u0391:\u0392:\u0393:\u0394:\u0395:\u0396:\u0397:\u0398:\u0399:\u039A:\u039B:\u039C:\u039D:\u039E:\u039F:\u03A0:\u03A1:\u03A3:\u03A4:\u03A5:\u03A6:\u03A7:\u03A8:\u03A9"},
68            /* English*/    {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
69            /* Spanish*/    {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
70            /* Estonian*/   {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:Z:\u017D:T:U:V:\u00D5:\u00C4:\u00D6:\u00DC:X:Y"},
71            /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
72            /* Finnish*/    {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
73            /* Filipino*/   {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:Ng:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
74            /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
75            /* Hebrew*/ {"he", "\u05D0:\u05D1:\u05D2:\u05D3:\u05D4:\u05D5:\u05D6:\u05D7:\u05D8:\u05D9:\u05DB:\u05DC:\u05DE:\u05E0:\u05E1:\u05E2:\u05E4:\u05E6:\u05E7:\u05E8:\u05E9:\u05EA"},
76            /* Icelandic*/  {"is", "A:\u00C1:B:C:D:\u00D0:E:\u00C9:F:G:H:I:\u00CD:J:K:L:M:N:O:\u00D3:P:Q:R:S:T:U:\u00DA:V:W:X:Y:\u00DD:Z:\u00DE:\u00C6:\u00D6"},
77            /* Italian*/    {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
78            /* Japanese*/   {"ja", "\u3042:\u304B:\u3055:\u305F:\u306A:\u306F:\u307E:\u3084:\u3089:\u308F"},
79            /* Korean*/ {"ko", "\u3131:\u3134:\u3137:\u3139:\u3141:\u3142:\u3145:\u3147:\u3148:\u314A:\u314B:\u314C:\u314D:\u314E"},
80            /* Lithuanian*/ {"lt", "A:B:C:\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\u0160:T:U:V:Z:\u017D"},
81            /* Latvian*/    {"lv", "A:B:C:\u010C:D:E:F:G:\u0122:H:I:J:K:\u0136:L:\u013B:M:N:\u0145:O:P:Q:R:S:\u0160:T:U:V:W:X:Z:\u017D"},
82            /* Norwegian Bokm\u00E5l*/  {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
83            /* Dutch*/  {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
84            /* Polish*/ {"pl", "A:\u0104:B:C:\u0106:D:E:\u0118:F:G:H:I:J:K:L:\u0141:M:N:\u0143:O:\u00D3:P:Q:R:S:\u015A:T:U:V:W:X:Y:Z:\u0179:\u017B"},
85            /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
86            /* Romanian*/   {"ro", "A:\u0102:\u00C2:B:C:D:E:F:G:H:I:\u00CE:J:K:L:M:N:O:P:Q:R:S:\u0218:T:\u021A:U:V:W:X:Y:Z"},
87            /* Russian*/    {"ru", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042B:\u042D:\u042E:\u042F"},
88            /* Slovak*/ {"sk", "A:\u00C4:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\u00D4:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
89            /* Slovenian*/  {"sl", "A:B:C:\u010C:\u0106:D:\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
90            /* Serbian*/    {"sr", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0402:\u0415:\u0416:\u0417:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040B:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
91            /* Swedish*/    {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
92            /* Turkish*/    {"tr", "A:B:C:\u00C7:D:E:F:G:H:I:\u0130:J:K:L:M:N:O:\u00D6:P:Q:R:S:\u015E:T:U:\u00DC:V:W:X:Y:Z"},
93            /* Ukrainian*/  {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
94            /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"},
95            /* Chinese*/    {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
96            /* Chinese (Traditional Han)*/  {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"},
97
98            // Comment these out to make the test run faster. Later, make these run under extended
99
100            //            /* Afrikaans*/  {"af", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
101            //            /* Akan*/   {"ak", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:O:\u0186:P:Q:R:S:T:U:V:W:X:Y:Z"},
102            //            /* Asu*/    {"asa", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
103            //            /* Azerbaijani*/    {"az", "A:B:C:\u00C7:D:E:\u018F:F:G:\u011E:H:X:I:\u0130:J:K:Q:L:M:N:O:\u00D6:P:R:S:\u015E:T:U:\u00DC:V:W:Y:Z"},
104            //            /* Belarusian*/ {"be", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0406:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u042B:\u042D:\u042E:\u042F"},
105            //            /* Bemba*/  {"bem", "A:B:C:E:F:G:I:J:K:L:M:N:O:P:S:T:U:W:Y"},
106            //            /* Bena*/   {"bez", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:Y:Z"},
107            //            /* Bambara*/    {"bm", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:\u0186:P:R:S:T:U:W:Y:Z"},
108            //            /* Tibetan*/    {"bo", "\u0F40:\u0F41:\u0F42:\u0F44:\u0F45:\u0F46:\u0F47:\u0F49:\u0F4F:\u0F50:\u0F51:\u0F53:\u0F54:\u0F55:\u0F56:\u0F58:\u0F59:\u0F5A:\u0F5B:\u0F5D:\u0F5E:\u0F5F:\u0F60:\u0F61:\u0F62:\u0F63:\u0F64:\u0F66:\u0F67:\u0F68"},
109            //            /* Chiga*/  {"cgg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
110            //            /* Cherokee*/   {"chr", "\u13A0:\u13A6:\u13AD:\u13B3:\u13B9:\u13BE:\u13C6:\u13CC:\u13D3:\u13DC:\u13E3:\u13E9:\u13EF"},
111            //            /* Welsh*/  {"cy", "A:B:C:CH:D:E:F:FF:G:H:I:J:L:LL:M:N:O:P:PH:R:RH:S:T:TH:U:W:Y"},
112            //            /* Taita*/  {"dav", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
113            //            /* Embu*/   {"ebu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
114            //            /* Ewe*/    {"ee", "A:B:C:D:\u0189:E:\u0190:F:\u0191:G:\u0194:H:I:J:K:L:M:N:\u014A:O:\u0186:P:Q:R:S:T:U:V:\u01B2:W:X:Y:Z"},
115            //            /* Esperanto*/  {"eo", "A:B:C:\u0108:D:E:F:G:\u011C:H:\u0124:I:J:\u0134:K:L:M:N:O:P:R:S:\u015C:T:U:\u016C:V:Z"},
116            //            /* Fulah*/  {"ff", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:L:M:N:\u014A:O:P:R:S:T:U:W:Y:\u01B3"},
117            //            /* Faroese*/    {"fo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8"},
118            //            /* Gusii*/  {"guz", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
119            //            /* Hausa*/  {"ha", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:\u0198:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
120            //            /* Igbo*/   {"ig", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
121            //            /* Machame*/    {"jmc", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
122            //            /* Kabyle*/ {"kab", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:P:Q:R:S:T:U:W:X:Y:Z"},
123            //            /* Kamba*/  {"kam", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
124            //            /* Makonde*/    {"kde", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
125            //            /* Kabuverdianu*/   {"kea", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:X:Z"},
126            //            /* Koyra Chiini*/   {"khq", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
127            //            /* Kikuyu*/ {"ki", "A:B:C:D:E:G:H:I:J:K:M:N:O:R:T:U:W:Y"},
128            //            /* Kalenjin*/   {"kln", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:W:Y"},
129            //            /* Langi*/  {"lag", "A:B:C:D:E:F:G:H:I:\u0197:J:K:L:M:N:O:P:Q:R:S:T:U:\u0244:V:W:X:Y:Z"},
130            //            /* Ganda*/  {"lg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
131            //            /* Luo*/    {"luo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
132            //            /* Luyia*/  {"luy", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
133            //            /* Masai*/  {"mas", "A:B:C:D:E:\u0190:G:H:I:\u0197:J:K:L:M:N:\u014A:O:\u0186:P:R:S:T:U:\u0244:W:Y"},
134            //            /* Meru*/   {"mer", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
135            //            /* Morisyen*/   {"mfe", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y:Z"},
136            //            /* Malagasy*/   {"mg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:V:Y:Z"},
137            // This should be the correct data.  Commented till it is fixed in CLDR collation data.
138            // {"mk", "\u0410:\u0411:\u0412:\u0413:\u0403:\u0414:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u040C:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
139            //            /* Macedonian*/ {"mk", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0403:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040C:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
140            // This should be the correct data.  Commented till it is fixed in CLDR collation data.
141            // {"mt", "A:B:C:\u010A:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
142            //            /* Maltese*/    {"mt", "A:B:\u010A:C:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
143            //            /* Nama*/   {"naq", "A:B:C:D:E:F:G:H:I:K:M:N:O:P:Q:R:S:T:U:W:X:Y:Z"},
144            //            /* North Ndebele*/  {"nd", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:S:T:U:V:W:X:Y:Z"},
145            //            /* Norwegian Nynorsk*/  {"nn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
146            //            /* Nyankole*/   {"nyn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
147            //            /* Oromo*/  {"om", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
148            //            /* Romansh*/    {"rm", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
149            //            /* Rombo*/  {"rof", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
150            //            /* Kinyarwanda*/    {"rw", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
151            //            /* Rwa*/    {"rwk", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
152            //            /* Samburu*/    {"saq", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
153            //            /* Sena*/   {"seh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
154            //            /* Koyraboro Senni*/    {"ses", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
155            //            /* Sango*/  {"sg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
156            //            /* Tachelhit*/  {"shi", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
157            //            /* Tachelhit (Tifinagh)*/   {"shi_Tfng", "\u2D30:\u2D31:\u2D33:\u2D37:\u2D39:\u2D3B:\u2D3C:\u2D3D:\u2D40:\u2D43:\u2D44:\u2D45:\u2D47:\u2D49:\u2D4A:\u2D4D:\u2D4E:\u2D4F:\u2D53:\u2D54:\u2D55:\u2D56:\u2D59:\u2D5A:\u2D5B:\u2D5C:\u2D5F:\u2D61:\u2D62:\u2D63:\u2D65"},
158            //            /* Shona*/  {"sn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
159            //            /* Teso*/   {"teo", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y"},
160            //            /* Tonga*/  {"to", "A:B:C:D:E:F:G:H:\u02BB:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
161            //            /* Central Morocco Tamazight*/  {"tzm", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
162            //            /* Uzbek (Latin)*/  {"uz_Latn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u02BF"},
163            //            /* Vunjo*/  {"vun", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
164            //            /* Soga*/   {"xog", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
165            //            /* Yoruba*/ {"yo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
166
167    };
168
169//    public void TestAAKeyword() {
170//    ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
171//            ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh");
172//    showBundle(rb, 0);
173//        String[] keywords = Collator.getKeywords();
174//        System.out.println(Arrays.asList(keywords));
175//        String locale = "zh";
176//        ULocale ulocale = new ULocale(locale);
177//        for (String keyword : keywords) {
178//            List<String> values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false));
179//            List<String> allValues = Arrays.asList(Collator.getKeywordValues(keyword));
180//            for (String value : allValues) {
181//                System.out.println(keyword + "=" + value);
182//                checkKeyword(locale, value, values.contains(value));
183//            }
184//        }
185//    }
186//
187//    private void checkKeyword(String locale, String collationValue, boolean shouldExist) {
188//        final ULocale base = new ULocale(locale);
189//        final ULocale desired = new ULocale(locale + "@collation=" + collationValue);
190//        Collator foo = Collator.getInstance(desired);
191//        ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE);
192//        if (shouldExist) {
193//            assertEquals("actual should match desired", desired, actual);
194//        } else {
195//            assertEquals("actual should match base", base, actual);
196//        }
197//        int comp = foo.compare("a", "ā");
198//        assertEquals("should fall back to default for zh", -1, comp);
199//    }
200//
201//    /**
202//     * @param rb
203//     * @param i
204//     */
205//    private static void showBundle(UResourceBundle rb, int i) {
206//        for (String key : rb.keySet()) {
207//            System.out.print("\n" + Utility.repeat("  ", i) + key);
208//            UResourceBundle rb2 = rb.get(key);
209//            showBundle(rb2, i+1);
210//        }
211//    }
212
213
214    @Test
215    public void TestA() {
216        String[][] tests = {{"zh_Hant", "渡辺", "12劃"},
217                {"zh", "渡辺", "D"}
218                /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/
219                };
220        for (String[] test : tests) {
221            AlphabeticIndex<Integer> alphabeticIndex = new AlphabeticIndex<Integer>(new ULocale(test[0]));
222            final String probe = test[1];
223            final String expectedLabel = test[2];
224            alphabeticIndex.addRecord(probe, 1);
225            List labels = alphabeticIndex.getBucketLabels();
226            logln(labels.toString());
227            Bucket<Integer> bucket = find(alphabeticIndex, probe);
228            assertEquals("locale " + test[0] + " name=" + probe + " in bucket",
229                    expectedLabel, bucket.getLabel());
230        }
231    }
232
233    private Bucket<Integer> find(AlphabeticIndex<Integer> alphabeticIndex, final String probe) {
234        for (Bucket<Integer> bucket : alphabeticIndex) {
235            for (Record<Integer> record : bucket) {
236                if (record.getName().equals(probe)) {
237                    return bucket;
238                }
239            }
240        }
241        return null;
242    }
243
244    @Test
245    public void TestFirstCharacters() {
246
247        AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH);
248        RuleBasedCollator collator = alphabeticIndex.getCollator();
249        collator.setStrength(Collator.IDENTICAL);
250        Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
251        // Verify that each script is represented exactly once.
252        // Exclude pseudo-scripts like Common (no letters).
253        // Exclude scripts like Braille and Sutton SignWriting
254        // because they only have symbols, not letters.
255        UnicodeSet missingScripts = new UnicodeSet(
256                "[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]");
257        String last = "";
258        for (String index : firsts) {
259            if (collator.compare(last,index) >= 0) {
260                errln("Characters not in order: " + last + " !< " + index);
261            }
262            int script = getFirstRealScript(index);
263            if (script == UScript.UNKNOWN) { continue; }
264            UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script);
265            if (missingScripts.containsNone(s)) {
266                errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false));
267            }
268            missingScripts.removeAll(s);
269        }
270        if (missingScripts.size() != 0) {
271            String missingScriptNames = "";
272            UnicodeSet missingChars = new UnicodeSet(missingScripts);
273            for(;;) {
274                int c = missingChars.charAt(0);
275                if (c < 0) {
276                    break;
277                }
278                int script = UScript.getScript(c);
279                missingScriptNames += " " +
280                        UCharacter.getPropertyValueName(
281                                UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
282                missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script));
283            }
284            errln("Missing character from:" + missingScriptNames + " -- " + missingScripts);
285        }
286    }
287
288    private static final int getFirstRealScript(CharSequence s) {
289        for (int i = 0; i < s.length();) {
290            int c = Character.codePointAt(s, i);
291            int script = UScript.getScript(c);
292            if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) {
293                return script;
294            }
295            i += Character.charCount(c);
296        }
297        return UScript.UNKNOWN;
298    }
299
300    @Test
301    public void TestBuckets() {
302        ULocale additionalLocale = ULocale.ENGLISH;
303
304        for (String[] pair : localeAndIndexCharactersLists) {
305            checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
306        }
307    }
308
309    @Test
310    public void TestEmpty() {
311        // just verify that it doesn't blow up.
312        Set<ULocale> locales = new LinkedHashSet<ULocale>();
313        locales.add(ULocale.ROOT);
314        locales.addAll(Arrays.asList(ULocale.getAvailableLocales()));
315        for (ULocale locale : locales) {
316            try {
317                AlphabeticIndex<String> alphabeticIndex = new AlphabeticIndex(locale);
318                alphabeticIndex.addRecord("hi", "HI");
319                for (Bucket<String> bucket : alphabeticIndex) {
320                    @SuppressWarnings("unused")
321                    LabelType labelType = bucket.getLabelType();
322                }
323            } catch (Exception e) {
324                errln("Exception when creating AlphabeticIndex for:\t" + locale.toLanguageTag());
325                errln(e.toString());
326            }
327        }
328    }
329
330    @Test
331    public void TestSetGetSpecialLabels() {
332        AlphabeticIndex index = new AlphabeticIndex(Locale.GERMAN).addLabels(new Locale("ru"));
333        index.setUnderflowLabel("__");
334        index.setInflowLabel("--");
335        index.setOverflowLabel("^^");
336        assertEquals("underflow label", "__", index.getUnderflowLabel());
337        assertEquals("inflow label", "--", index.getInflowLabel());
338        assertEquals("overflow label", "^^", index.getOverflowLabel());
339
340        ImmutableIndex ii = index.buildImmutableIndex();
341        assertEquals("0 -> underflow", "__", ii.getBucket(ii.getBucketIndex("0")).getLabel());
342        assertEquals("Ω -> inflow", "--", ii.getBucket(ii.getBucketIndex("Ω")).getLabel());
343        assertEquals("字 -> overflow", "^^", ii.getBucket(ii.getBucketIndex("字")).getLabel());
344    }
345
346    @Test
347    public void TestInflow() {
348        Object[][] tests = {
349                {0, ULocale.ENGLISH},
350                {0, ULocale.ENGLISH, new ULocale("el")},
351                {1, ULocale.ENGLISH, new ULocale("ru")},
352                {0, ULocale.ENGLISH, new ULocale("el"), new UnicodeSet("[\u2C80]"), new ULocale("ru")},
353                {0, ULocale.ENGLISH},
354                {2, ULocale.ENGLISH, new ULocale("ru"), ULocale.JAPANESE},
355        };
356        for (Object[] test : tests) {
357            int expected = (Integer) test[0];
358            AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex((ULocale)test[1]);
359            for (int i = 2; i < test.length; ++i) {
360                if (test[i] instanceof ULocale) {
361                    alphabeticIndex.addLabels((ULocale)test[i]);
362                } else {
363                    alphabeticIndex.addLabels((UnicodeSet)test[i]);
364                }
365            }
366            Counter<AlphabeticIndex.Bucket.LabelType> counter = new Counter();
367            for (Bucket<Double> bucket : alphabeticIndex) {
368                LabelType labelType = bucket.getLabelType();
369                counter.add(labelType, 1);
370            }
371            String printList = Arrays.asList(test).toString();
372            assertEquals(LabelType.UNDERFLOW + "\t" + printList, 1, counter.get(LabelType.UNDERFLOW));
373            assertEquals(LabelType.INFLOW + "\t" + printList, expected, counter.get(LabelType.INFLOW));
374            if (expected != counter.get(LabelType.INFLOW)) {
375                // for debugging
376                AlphabeticIndex<Double> indexCharacters2 = new AlphabeticIndex((ULocale)test[1]);
377                for (int i = 2; i < test.length; ++i) {
378                    if (test[i] instanceof ULocale) {
379                        indexCharacters2.addLabels((ULocale)test[i]);
380                    } else {
381                        indexCharacters2.addLabels((UnicodeSet)test[i]);
382                    }
383                }
384                List<Bucket<Double>> buckets = CollectionUtilities.addAll(alphabeticIndex.iterator(), new ArrayList<Bucket<Double>>());
385                logln(buckets.toString());
386            }
387            assertEquals(LabelType.OVERFLOW + "\t" + printList, 1, counter.get(LabelType.OVERFLOW));
388        }
389    }
390
391    private void checkBuckets(String localeString, String[] test, ULocale additionalLocale, String testBucket, String... items) {
392        StringBuilder UI = new StringBuilder();
393        ULocale desiredLocale = new ULocale(localeString);
394
395        // Create a simple index where the values for the strings are Integers, and add the strings
396        AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale);
397        int counter = 0;
398        Counter<String> itemCount = new Counter();
399        for (String item : test) {
400            index.addRecord(item, counter++);
401            itemCount.add(item, 1);
402        }
403        assertEquals("getRecordCount()", (int)itemCount.getTotal(), index.getRecordCount());  // code coverage
404
405        List<String> labels = index.getBucketLabels();
406        ImmutableIndex<Integer> immIndex = index.buildImmutableIndex();
407
408        logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t"
409                + index.getCollator().getLocale(ULocale.ACTUAL_LOCALE));
410        UI.setLength(0);
411        UI.append(desiredLocale + "\t");
412        boolean showAll = true;
413
414        // Show index at top. We could skip or gray out empty buckets
415        for (AlphabeticIndex.Bucket<Integer> bucket : index) {
416            if (showAll || bucket.size() != 0) {
417                showLabelAtTop(UI, bucket.getLabel());
418            }
419        }
420        logln(UI.toString());
421
422        // Show the buckets with their contents, skipping empty buckets
423        int bucketIndex = 0;
424        for (Bucket<Integer> bucket : index) {
425            assertEquals("bucket label vs. iterator",
426                    labels.get(bucketIndex), bucket.getLabel());
427            assertEquals("bucket label vs. immutable",
428                    labels.get(bucketIndex), immIndex.getBucket(bucketIndex).getLabel());
429            assertEquals("bucket label type vs. immutable",
430                    bucket.getLabelType(), immIndex.getBucket(bucketIndex).getLabelType());
431            for (Record<Integer> r : bucket) {
432                CharSequence name = r.getName();
433                assertEquals("getBucketIndex(" + name + ")",
434                        bucketIndex, index.getBucketIndex(name));
435                assertEquals("immutable getBucketIndex(" + name + ")",
436                        bucketIndex, immIndex.getBucketIndex(name));
437            }
438            if (bucket.getLabel().equals(testBucket)) {
439                Counter<String> keys = getKeys(bucket);
440                for (String item : items) {
441                    long globalCount = itemCount.get(item);
442                    long localeCount = keys.get(item);
443                    if (globalCount != localeCount) {
444                        errln("Error: in " + "'" + testBucket + "', '" + item + "' should have count "
445                                + globalCount + " but has count " + localeCount);
446                    }
447
448                }
449            }
450
451            if (bucket.size() != 0) {
452                showLabelInList(UI, bucket.getLabel());
453                for (AlphabeticIndex.Record<Integer> item : bucket) {
454                    showIndexedItem(UI, item.getName(), item.getData());
455                }
456                logln(UI.toString());
457            }
458            ++bucketIndex;
459        }
460        assertEquals("getBucketCount()", bucketIndex, index.getBucketCount());
461        assertEquals("immutable getBucketCount()", bucketIndex, immIndex.getBucketCount());
462
463        assertNull("immutable getBucket(-1)", immIndex.getBucket(-1));
464        assertNull("immutable getBucket(count)", immIndex.getBucket(bucketIndex));
465
466        for (Bucket<Integer> bucket : immIndex) {
467            assertEquals("immutable bucket size", 0, bucket.size());
468            assertFalse("immutable bucket iterator.hasNext()", bucket.iterator().hasNext());
469        }
470    }
471
472    public <T> void showIndex(AlphabeticIndex<T> index, boolean showEmpty) {
473        logln("Actual");
474        StringBuilder UI = new StringBuilder();
475        for (Bucket<T> bucket : index) {
476            if (showEmpty || bucket.size() != 0) {
477                showLabelInList(UI, bucket.getLabel());
478                for (Record<T> item : bucket) {
479                    showIndexedItem(UI, item.getName(), item.getData());
480                }
481                logln(UI.toString());
482            }
483        }
484    }
485
486    /**
487     * @param myBucketLabels
488     * @param myBucketContents
489     * @param b
490     */
491    private void showIndex(List<String> myBucketLabels, ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents, boolean showEmpty) {
492        logln("Alternative");
493        StringBuilder UI = new StringBuilder();
494
495        for (int i = 0; i < myBucketLabels.size(); ++i) {
496            Set<R4<RawCollationKey, String, Integer, Double>> bucket = myBucketContents.get(i);
497            if (!showEmpty && bucket.size() == 0) {
498                continue;
499            }
500            UI.setLength(0);
501            UI.append("*").append(myBucketLabels.get(i));
502            for (R4<RawCollationKey, String, Integer, Double> item : bucket) {
503                UI.append("\t ").append(item.get1().toString()).append(ARROW).append(item.get3().toString());
504            }
505            logln(UI.toString());
506        }
507    }
508
509    private void showLabelAtTop(StringBuilder buffer, String label) {
510        buffer.append(label + " ");
511    }
512
513    private <T> void showIndexedItem(StringBuilder buffer, CharSequence key, T value) {
514        buffer.append("\t " + key + ARROW + value);
515    }
516
517    private void showLabelInList(StringBuilder buffer, String label) {
518        buffer.setLength(0);
519        buffer.append(label);
520    }
521
522    private Counter<String> getKeys(AlphabeticIndex.Bucket<Integer> entry) {
523        Counter<String> keys = new Counter<String>();
524        for (AlphabeticIndex.Record x : entry) {
525            String key = x.getName().toString();
526            keys.add(key, 1);
527        }
528        return keys;
529    }
530
531    @Test
532    public void TestIndexCharactersList() {
533        for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) {
534            ULocale locale = new ULocale(localeAndIndexCharacters[0]);
535            String expectedIndexCharacters = "\u2026:" + localeAndIndexCharacters[1] + ":\u2026";
536            Collection<String> alphabeticIndex = new AlphabeticIndex(locale).getBucketLabels();
537
538            // Join the elements of the list to a string with delimiter ":"
539            StringBuilder sb = new StringBuilder();
540            Iterator<String> iter = alphabeticIndex.iterator();
541            while (iter.hasNext()) {
542                sb.append(iter.next());
543                if (!iter.hasNext()) {
544                    break;
545                }
546                sb.append(":");
547            }
548            String actualIndexCharacters = sb.toString();
549            if (!expectedIndexCharacters.equals(actualIndexCharacters)) {
550                errln("Test failed for locale " + localeAndIndexCharacters[0] +
551                        "\n  Expected = |" + expectedIndexCharacters + "|\n  actual   = |" + actualIndexCharacters + "|");
552            }
553        }
554    }
555
556    @Test
557    public void TestBasics() {
558        ULocale[] list = ULocale.getAvailableLocales();
559        // get keywords combinations
560        // don't bother with multiple combinations at this point
561        List keywords = new ArrayList();
562        keywords.add("");
563
564        String[] collationValues = Collator.getKeywordValues("collation");
565        for (int j = 0; j < collationValues.length; ++j) {
566            keywords.add("@collation=" + collationValues[j]);
567        }
568
569        for (int i = 0; i < list.length; ++i) {
570            for (Iterator it = keywords.iterator(); it.hasNext();) {
571                String collationValue = (String) it.next();
572                String localeString = list[i].toString();
573                if (!KEY_LOCALES.contains(localeString)) continue; // TODO change in exhaustive
574                ULocale locale = new ULocale(localeString + collationValue);
575                if (collationValue.length() > 0 && !Collator.getFunctionalEquivalent("collation", locale).equals(locale)) {
576                    //logln("Skipping " + locale);
577                    continue;
578                }
579
580                if (locale.getCountry().length() != 0) {
581                    continue;
582                }
583                boolean isUnihan = collationValue.contains("unihan");
584                AlphabeticIndex alphabeticIndex = new AlphabeticIndex(locale);
585                if (isUnihan) {
586                    // Unihan tailorings have a label per radical, and there are at least 214,
587                    // if not more when simplified radicals are distinguished.
588                    alphabeticIndex.setMaxLabelCount(500);
589                }
590                final Collection mainChars = alphabeticIndex.getBucketLabels();
591                String mainCharString = mainChars.toString();
592                if (mainCharString.length() > 500) {
593                    mainCharString = mainCharString.substring(0,500) + "...";
594                }
595                logln(mainChars.size() + "\t" + locale + "\t" + locale.getDisplayName(ULocale.ENGLISH));
596                logln("Index:\t" + mainCharString);
597                if (!isUnihan && mainChars.size() > 100) {
598                    errln("Index character set too large: " +
599                            locale + " [" + mainChars.size() + "]:\n    " + mainChars);
600                }
601            }
602        }
603    }
604
605    @Test
606    public void TestClientSupport() {
607        for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"}
608            ULocale ulocale = new ULocale(localeString);
609            AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex<Double>(ulocale).addLabels(Locale.ENGLISH);
610            RuleBasedCollator collator = alphabeticIndex.getCollator();
611            String [][] tests;
612
613            if (!localeString.equals("zh") ) {
614                tests = new String[][] {SimpleTests};
615            } else {
616                tests = new String[][] {SimpleTests, hackPinyin, simplifiedNames};
617            }
618
619            for (String [] shortTest : tests) {
620                double testValue = 100;
621                alphabeticIndex.clearRecords();
622                for (String name : shortTest) {
623                    alphabeticIndex.addRecord(name, testValue++);
624                }
625
626                if (DEBUG) showIndex(alphabeticIndex, false);
627
628                // make my own copy
629                testValue = 100;
630                List<String> myBucketLabels = alphabeticIndex.getBucketLabels();
631                ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents = new ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>>(myBucketLabels.size());
632                for (int i = 0; i < myBucketLabels.size(); ++i) {
633                    myBucketContents.add(new TreeSet<R4<RawCollationKey, String, Integer, Double>>());
634                }
635                for (String name : shortTest) {
636                    int bucketIndex = alphabeticIndex.getBucketIndex(name);
637                    if (bucketIndex > myBucketContents.size()) {
638                        alphabeticIndex.getBucketIndex(name); // call again for debugging
639                    }
640                    Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(bucketIndex);
641                    RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null);
642                    R4<RawCollationKey, String, Integer, Double> row = Row.of(rawCollationKey, name, name.length(), testValue++);
643                    myBucket.add(row);
644                }
645                if (DEBUG) showIndex(myBucketLabels, myBucketContents, false);
646
647                // now compare
648                int index = 0;
649                boolean gotError = false;
650                for (AlphabeticIndex.Bucket<Double> bucket : alphabeticIndex) {
651                    String bucketLabel = bucket.getLabel();
652                    String myLabel = myBucketLabels.get(index);
653                    if (!bucketLabel.equals(myLabel)) {
654                        gotError |= !assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
655                    }
656                    Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(index);
657                    Iterator<R4<RawCollationKey, String, Integer, Double>> myBucketIterator = myBucket.iterator();
658                    int recordIndex = 0;
659                    for (Record<Double> record : bucket) {
660                        String myName = null;
661                        if (myBucketIterator.hasNext()) {
662                            R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
663                            myName = myRecord.get1();
664                        }
665                        if (!record.getName().equals(myName)) {
666                            gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", record.getName(), myName);
667                        }
668                    }
669                    while (myBucketIterator.hasNext()) {
670                        R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
671                        String myName = myRecord.get1();
672                        gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", null, myName);
673                    }
674                    index++;
675                }
676                if (gotError) {
677                    showIndex(myBucketLabels, myBucketContents, false);
678                    showIndex(alphabeticIndex, false);
679                }
680            }
681        }
682    }
683
684    @Test
685    public void TestFirstScriptCharacters() {
686        Collection<String> firstCharacters =
687                new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts();
688        Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT));
689        Collection<String> diff = new TreeSet<String>(firstCharacters);
690        diff.removeAll(expectedFirstCharacters);
691        assertTrue("First Characters contains unexpected ones: " + diff, diff.isEmpty());
692        diff.clear();
693        diff.addAll(expectedFirstCharacters);
694        diff.removeAll(firstCharacters);
695        assertTrue("First Characters missing expected ones: " + diff, diff.isEmpty());
696    }
697
698    private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze();
699
700    /**
701     * Returns a collection of all the "First" characters of scripts, according to the collation.
702     */
703    private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
704        String[] results = new String[UScript.CODE_LIMIT];
705        for (String current : TO_TRY) {
706            if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols.
707                continue;
708            }
709            int script = UScript.getScript(current.codePointAt(0));
710            if (results[script] == null) {
711                results[script] = current;
712            } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
713                results[script] = current;
714            }
715        }
716
717        try {
718            UnicodeSet extras = new UnicodeSet();
719            UnicodeSet expansions = new UnicodeSet();
720            ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
721            extras.addAll(expansions).removeAll(TO_TRY);
722            if (extras.size() != 0) {
723                Normalizer2 normalizer = Normalizer2.getNFKCInstance();
724                for (String current : extras) {
725                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) {
726                        continue;
727                    }
728                    int script = getFirstRealScript(current);
729                    if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; }
730                    if (results[script] == null) {
731                        results[script] = current;
732                    } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
733                        results[script] = current;
734                    }
735                }
736            }
737        } catch (Exception e) {
738        } // why have a checked exception???
739
740        // TODO: We should not test that we get the same strings, but that we
741        // get strings that sort primary-equal to those from the implementation.
742
743        Collection<String> result = new ArrayList<String>();
744        for (int i = 0; i < results.length; ++i) {
745            if (results[i] != null) {
746                result.add(results[i]);
747            }
748        }
749        return result;
750    }
751
752    private static final boolean isUnassignedBoundary(CharSequence s) {
753        // The root collator provides a script-first-primary boundary contraction
754        // for the unassigned-implicit range.
755        return s.charAt(0) == 0xfdd1 &&
756                UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN;
757    }
758
759    @Test
760    public void TestZZZ() {
761        //            int x = 3;
762        //            AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
763        //            UnicodeSet additions = new UnicodeSet();
764        //            additions.add(0x410).add(0x415);  // Cyrillic
765        //            // additions.add(0x391).add(0x393);     // Greek
766        //            index.addLabels(additions);
767        //            int lc = index.getLabels().size();
768        //            List  labels = index.getLabels();
769        //            System.out.println("Label Count = " + lc + "\t" + labels);
770        //            System.out.println("Bucket Count =" + index.getBucketCount());
771    }
772
773    @Test
774    public void TestSimplified() {
775        checkBuckets("zh", simplifiedNames, ULocale.ENGLISH, "W", "\u897f");
776    }
777
778    @Test
779    public void TestTraditional() {
780        checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580");
781    }
782
783    static final String[] SimpleTests = {
784        "斎藤",
785        "\u1f2d\u03c1\u03b1",
786        "$", "\u00a3", "12", "2",
787        "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul",
788        "\u00deor", "\u00c5berg", "\u00d6stlund",
789        "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6",
790        "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac",
791        //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2",
792        "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4",
793        //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34"
794    };
795
796    static final String[] hackPinyin = {
797        "a", "\u5416", "\u58ba", //
798        "b", "\u516b", "\u62d4", "\u8500", //
799        "c", "\u5693", "\u7938", "\u9e7e", //
800        "d", "\u5491", "\u8fcf", "\u964a", //
801        "e","\u59b8", "\u92e8", "\u834b", //
802        "f", "\u53d1", "\u9197", "\u99a5", //
803        "g", "\u7324", "\u91d3", "\u8142", //
804        "h", "\u598e", "\u927f", "\u593b", //
805        "j", "\u4e0c", "\u6785", "\u9d58", //
806        "k", "\u5494", "\u958b", "\u7a52", //
807        "l", "\u5783", "\u62c9", "\u9ba5", //
808        "m", "\u5638", "\u9ebb", "\u65c0", //
809        "n", "\u62ff", "\u80ad", "\u685b", //
810        "o", "\u5662", "\u6bee", "\u8bb4", //
811        "p", "\u5991", "\u8019", "\u8c31", //
812        "q", "\u4e03", "\u6053", "\u7f56", //
813        "r", "\u5465", "\u72aa", "\u6e03", //
814        "s", "\u4ee8", "\u9491", "\u93c1", //
815        "t", "\u4ed6", "\u9248", "\u67dd", //
816        "w", "\u5c72", "\u5558", "\u5a7a", //
817        "x", "\u5915", "\u5438", "\u6bbe", //
818        "y", "\u4e2b", "\u82bd", "\u8574", //
819        "z", "\u5e00", "\u707d", "\u5c0a"
820    };
821
822    static final String[] simplifiedNames = {
823        "Abbot", "Morton", "Zachary", "Williams", "\u8d75", "\u94b1", "\u5b59", "\u674e", "\u5468", "\u5434", "\u90d1", "\u738b", "\u51af", "\u9648", "\u696e", "\u536b", "\u848b", "\u6c88",
824        "\u97e9", "\u6768", "\u6731", "\u79e6", "\u5c24", "\u8bb8", "\u4f55", "\u5415", "\u65bd", "\u5f20", "\u5b54", "\u66f9", "\u4e25", "\u534e", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8c22", "\u90b9",
825        "\u55bb", "\u67cf", "\u6c34", "\u7aa6", "\u7ae0", "\u4e91", "\u82cf", "\u6f58", "\u845b", "\u595a", "\u8303", "\u5f6d", "\u90ce", "\u9c81", "\u97e6", "\u660c", "\u9a6c", "\u82d7", "\u51e4", "\u82b1", "\u65b9",
826        "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9c8d", "\u53f2", "\u5510", "\u8d39", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8d3a", "\u502a", "\u6c64", "\u6ed5", "\u6bb7", "\u7f57", "\u6bd5", "\u90dd",
827        "\u90ac", "\u5b89", "\u5e38", "\u4e50", "\u4e8e", "\u65f6", "\u5085", "\u76ae", "\u535e", "\u9f50", "\u5eb7", "\u4f0d", "\u4f59", "\u5143", "\u535c", "\u987e", "\u5b5f", "\u5e73", "\u9ec4", "\u548c", "\u7a46",
828        "\u8427", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8d1d", "\u660e", "\u81e7", "\u8ba1", "\u4f0f", "\u6210", "\u6234", "\u8c08", "\u5b8b", "\u8305",
829        "\u5e9e", "\u718a", "\u7eaa", "\u8212", "\u5c48", "\u9879", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u84dd", "\u95fd", "\u5e2d", "\u5b63", "\u9ebb", "\u5f3a", "\u8d3e", "\u8def", "\u5a04", "\u5371",
830        "\u6c5f", "\u7ae5", "\u989c", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u953a", "\u5f90", "\u4e18", "\u9a86", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e",
831        "\u4e07", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u5362", "\u83ab", "\u7ecf", "\u623f", "\u88d8", "\u7f2a", "\u5e72", "\u89e3", "\u5e94", "\u5b97", "\u4e01", "\u5ba3", "\u8d32", "\u9093", "\u90c1", "\u5355",
832        "\u676d", "\u6d2a", "\u5305", "\u8bf8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u94ae", "\u9f9a", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9646", "\u8363", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
833        "\u60e0", "\u7504", "\u9eb9", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u50a8", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u4e4c", "\u7126", "\u5df4", "\u5f13",
834        "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8f66", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bab", "\u5b81", "\u4ec7", "\u683e", "\u66b4", "\u7518",
835        "\u659c", "\u5389", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5218", "\u666f", "\u8a79", "\u675f", "\u9f99", "\u53f6", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u84df", "\u8584", "\u5370", "\u5bbf",
836        "\u767d", "\u6000", "\u84b2", "\u90b0", "\u4ece", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8d56", "\u5353", "\u853a", "\u5c60", "\u8499", "\u6c60", "\u4e54", "\u9634", "\u90c1", "\u80e5", "\u80fd", "\u82cd",
837        "\u53cc", "\u95fb", "\u8398", "\u515a", "\u7fdf", "\u8c2d", "\u8d21", "\u52b3", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u90e6", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842",
838        "\u6fee", "\u725b", "\u5bff", "\u901a", "\u8fb9", "\u6248", "\u71d5", "\u5180", "\u90cf", "\u6d66", "\u5c1a", "\u519c", "\u6e29", "\u522b", "\u5e84", "\u664f", "\u67f4", "\u77bf", "\u960e", "\u5145", "\u6155",
839        "\u8fde", "\u8339", "\u4e60", "\u5ba6", "\u827e", "\u9c7c", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7ec8", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f",
840        "\u6ee1", "\u5f18", "\u5321", "\u56fd", "\u6587", "\u5bc7", "\u5e7f", "\u7984", "\u9619", "\u4e1c", "\u6b27", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e08", "\u5de9", "\u538d",
841        "\u8042", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u961a", "\u90a3", "\u7b80", "\u9976", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u517b", "\u97a0", "\u987b", "\u4e30",
842        "\u5de2", "\u5173", "\u84af", "\u76f8", "\u67e5", "\u540e", "\u8346", "\u7ea2", "\u6e38", "\u7afa", "\u6743", "\u9011", "\u76d6", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u9a6c", "\u4e0a\u5b98", "\u6b27\u9633",
843        "\u590f\u4faf", "\u8bf8\u845b", "\u95fb\u4eba", "\u4e1c\u65b9", "\u8d6b\u8fde", "\u7687\u752b", "\u5c09\u8fdf", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u9633", "\u6df3\u4e8e", "\u5355\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b59", "\u4ef2\u5b59",
844        "\u8f69\u8f95", "\u4ee4\u72d0", "\u953a\u79bb", "\u5b87\u6587", "\u957f\u5b59", "\u6155\u5bb9", "\u9c9c\u4e8e", "\u95fe\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8f66", "\u989b\u5b59", "\u7aef\u6728", "\u5deb\u9a6c",
845        "\u516c\u897f", "\u6f06\u96d5", "\u4e50\u6b63", "\u58e4\u9a77", "\u516c\u826f", "\u62d3\u62d4", "\u5939\u8c37", "\u5bb0\u7236", "\u8c37\u6881", "\u664b", "\u695a", "\u960e", "\u6cd5", "\u6c5d", "\u9122", "\u6d82", "\u94a6", "\u6bb5\u5e72", "\u767e\u91cc",
846        "\u4e1c\u90ed", "\u5357\u95e8", "\u547c\u5ef6", "\u5f52", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e05", "\u7f11", "\u4ea2", "\u51b5", "\u540e", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u4e1c\u95e8", "\u897f\u95e8",
847        "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f"
848    };
849
850    static final String[] traditionalNames = { "丁", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b",
851            "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523",
852            "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd",
853            "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a",
854            "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58",
855            "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7",
856            "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2",
857            "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5",
858            "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642",
859            "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867",
860            "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b",
861            "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08",
862            "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212",
863            "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d",
864            "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f",
865            "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8",
866            "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f",
867            "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79",
868            "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d",
869            "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b",
870            "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
871            "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72",
872            "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4",
873            "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168",
874            "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12",
875            "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f",
876            "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a",
877            "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22",
878            "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670",
879            "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a",
880            "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148",
881            "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a",
882            "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a",
883            "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6",
884            "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe",
885            "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321",
886            "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83",
887            "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641",
888            "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952",
889            "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2",
890            "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a",
891            "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98",
892            "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023",
893            "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f",
894            "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b",
895            "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b",
896            "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98",
897            "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac",
898            "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4",
899            "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122",
900            "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6",
901            "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1",
902            "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546",
903            "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a",
904            "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7",
905            "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26",
906            "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4",
907            "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35",
908            "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E",
909            "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A",
910            "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1",
911            "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C",
912            "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C",
913            "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3",
914            "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", };
915
916    /**
917     * Test AlphabeticIndex vs. root with script reordering.
918     */
919    @Test
920    public void TestHaniFirst() {
921        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
922        coll.setReorderCodes(UScript.HAN);
923        AlphabeticIndex index = new AlphabeticIndex(coll);
924        assertEquals("getBucketCount()", 1, index.getBucketCount());   // ... (underflow only)
925        index.addLabels(Locale.ENGLISH);
926        assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
927        int bucketIndex = index.getBucketIndex("\u897f");
928        assertEquals("getBucketIndex(U+897F)", 0, bucketIndex);  // underflow bucket
929        bucketIndex = index.getBucketIndex("i");
930        assertEquals("getBucketIndex(i)", 9, bucketIndex);
931        bucketIndex = index.getBucketIndex("\u03B1");
932        assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
933        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
934        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
935        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
936        bucketIndex = index.getBucketIndex("\uFFFF");
937        assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
938    }
939
940    /**
941     * Test AlphabeticIndex vs. Pinyin with script reordering.
942     */
943    @Test
944    public void TestPinyinFirst() {
945        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE);
946        coll.setReorderCodes(UScript.HAN);
947        AlphabeticIndex index = new AlphabeticIndex(coll);
948        assertEquals("getBucketCount()", 28, index.getBucketCount());   // ... A-Z ...
949        index.addLabels(Locale.CHINESE);
950        assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
951        int bucketIndex = index.getBucketIndex("\u897f");
952        assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex);
953        bucketIndex = index.getBucketIndex("i");
954        assertEquals("getBucketIndex(i)", 9, bucketIndex);
955        bucketIndex = index.getBucketIndex("\u03B1");
956        assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
957        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
958        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
959        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
960        bucketIndex = index.getBucketIndex("\uFFFF");
961        assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
962    }
963
964    /**
965     * Test labels with multiple primary weights.
966     */
967    @Test
968    public void TestSchSt() {
969        AlphabeticIndex index = new AlphabeticIndex(ULocale.GERMAN);
970        index.addLabels(new UnicodeSet("[Æ{Sch*}{St*}]"));
971        // ... A Æ B-R S Sch St T-Z ...
972        ImmutableIndex immIndex = index.buildImmutableIndex();
973        assertEquals("getBucketCount()", 31, index.getBucketCount());
974        assertEquals("immutable getBucketCount()", 31, immIndex.getBucketCount());
975        String[][] testCases = new String[][] {
976            // name, bucket index, bucket label
977            { "Adelbert", "1", "A" },
978            { "Afrika", "1", "A" },
979            { "Æsculap", "2", "Æ" },
980            { "Aesthet", "2", "Æ" },
981            { "Berlin", "3", "B" },
982            { "Rilke", "19", "R" },
983            { "Sacher", "20", "S" },
984            { "Seiler", "20", "S" },
985            { "Sultan", "20", "S" },
986            { "Schiller", "21", "Sch" },
987            { "Steiff", "22", "St" },
988            { "Thomas", "23", "T" }
989        };
990        List<String> labels = index.getBucketLabels();
991        for (String[] testCase : testCases) {
992            String name = testCase[0];
993            int bucketIndex = Integer.valueOf(testCase[1]);
994            String label = testCase[2];
995            String msg = "getBucketIndex(" + name + ")";
996            assertEquals(msg, bucketIndex, index.getBucketIndex(name));
997            msg = "immutable " + msg;
998            assertEquals(msg, bucketIndex, immIndex.getBucketIndex(name));
999            msg = "bucket label (" + name + ")";
1000            assertEquals(msg, label, labels.get(index.getBucketIndex(name)));
1001            msg = "immutable " + msg;
1002            assertEquals(msg, label, immIndex.getBucket(bucketIndex).getLabel());
1003        }
1004    }
1005
1006    /**
1007     * With no real labels, there should be only the underflow label.
1008     */
1009    @Test
1010    public void TestNoLabels() {
1011        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
1012        AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(coll);
1013        index.addRecord("\u897f", 0);
1014        index.addRecord("i", 0);
1015        index.addRecord("\u03B1", 0);
1016        assertEquals("getRecordCount()", 3, index.getRecordCount());  // code coverage
1017        assertEquals("getBucketCount()", 1, index.getBucketCount());  // ...
1018        Bucket<Integer> bucket = index.iterator().next();
1019        assertEquals("underflow label type", LabelType.UNDERFLOW, bucket.getLabelType());
1020        assertEquals("all records in the underflow bucket", 3, bucket.size());
1021    }
1022
1023    /**
1024     * Test with the Bopomofo-phonetic tailoring.
1025     */
1026    @Test
1027    public void TestChineseZhuyin() {
1028        AlphabeticIndex index = new AlphabeticIndex(ULocale.forLanguageTag("zh-u-co-zhuyin"));
1029        ImmutableIndex immIndex = index.buildImmutableIndex();
1030        assertEquals("getBucketCount()", 38, immIndex.getBucketCount());  // ... ㄅ ㄆ ㄇ ㄈ ㄉ -- ㄩ ...
1031        assertEquals("label 1", "ㄅ", immIndex.getBucket(1).getLabel());
1032        assertEquals("label 2", "ㄆ", immIndex.getBucket(2).getLabel());
1033        assertEquals("label 3", "ㄇ", immIndex.getBucket(3).getLabel());
1034        assertEquals("label 4", "ㄈ", immIndex.getBucket(4).getLabel());
1035        assertEquals("label 5", "ㄉ", immIndex.getBucket(5).getLabel());
1036    }
1037
1038    @Test
1039    public void TestJapaneseKanji() {
1040        AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE);
1041        AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
1042        // There are no index characters for Kanji in the Japanese standard collator.
1043        // They should all go into the overflow bucket.
1044        final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
1045        int overflowIndex = immIndex.getBucketCount() - 1;
1046        for(int i = 0; i < kanji.length; ++i) {
1047            String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]);
1048            assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i])));
1049        }
1050    }
1051
1052    @Test
1053    public void TestFrozenCollator() {
1054        // Ticket #9472
1055        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da"));
1056        coll.setStrength(Collator.IDENTICAL);
1057        coll.freeze();
1058        // The AlphabeticIndex constructor used to throw an exception
1059        // because it cloned the collator (which preserves frozenness)
1060        // and set the clone's strength to PRIMARY.
1061        AlphabeticIndex index = new AlphabeticIndex(coll);
1062        assertEquals("same strength as input Collator",
1063                Collator.IDENTICAL, index.getCollator().getStrength());
1064    }
1065
1066    @Test
1067    public void TestChineseUnihan() {
1068        AlphabeticIndex index = new AlphabeticIndex(new ULocale("zh-u-co-unihan"));
1069        index.setMaxLabelCount(500);  // ICU 54 default is 99.
1070        assertEquals("getMaxLabelCount()", 500, index.getMaxLabelCount());  // code coverage
1071        AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
1072        int bucketCount = immIndex.getBucketCount();
1073        if(bucketCount < 216) {
1074            // There should be at least an underflow and overflow label,
1075            // and one for each of 214 radicals,
1076            // and maybe additional labels for simplified radicals.
1077            // (ICU4C: dataerrln(), prints only a warning if the data is missing)
1078            errln("too few buckets/labels for Chinese/unihan: " + bucketCount +
1079                    " (is zh/unihan data available?)");
1080            return;
1081        } else {
1082            logln("Chinese/unihan has " + bucketCount + " buckets/labels");
1083        }
1084        // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
1085        int bucketIndex = index.getBucketIndex("\u4e5d");
1086        assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
1087        // radical 100, and there is a 90' since Unicode 8
1088        bucketIndex = index.getBucketIndex("\u7527");
1089        assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
1090    }
1091
1092    @Test
1093    public void testAddLabels_Locale() {
1094        AlphabeticIndex<?> ulocaleIndex = new AlphabeticIndex<String>(ULocale.CANADA);
1095        AlphabeticIndex<?> localeIndex = new AlphabeticIndex<String>(Locale.CANADA);
1096        ulocaleIndex.addLabels(ULocale.SIMPLIFIED_CHINESE);
1097        localeIndex.addLabels(Locale.SIMPLIFIED_CHINESE);
1098        assertEquals("getBucketLables() results of ulocaleIndex and localeIndex differ",
1099                ulocaleIndex.getBucketLabels(), localeIndex.getBucketLabels());
1100    }
1101
1102    @Test
1103    public void testGetRecordCount_empty() {
1104        assertEquals("Record count of empty index not 0", 0,
1105                new AlphabeticIndex<String>(ULocale.CANADA).getRecordCount());
1106    }
1107
1108    @Test
1109    public void testGetRecordCount_withRecords() {
1110        assertEquals("Record count of index with one record not 1", 1,
1111                new AlphabeticIndex<String>(ULocale.CANADA).addRecord("foo", null).getRecordCount());
1112    }
1113
1114    /**
1115     * Check that setUnderflowLabel/setOverflowLabel/setInflowLabel correctly influence the name of
1116     * generated labels.
1117     */
1118    @Test
1119    public void testFlowLabels() {
1120        AlphabeticIndex<?> index = new AlphabeticIndex<String>(ULocale.ENGLISH)
1121                .addLabels(ULocale.forLanguageTag("ru"));
1122        index.setUnderflowLabel("underflow");
1123        index.setOverflowLabel("overflow");
1124        index.setInflowLabel("inflow");
1125        index.addRecord("!", null);
1126        index.addRecord("\u03B1", null); // GREEK SMALL LETTER ALPHA
1127        index.addRecord("\uab70", null); // CHEROKEE SMALL LETTER A
1128        AlphabeticIndex.Bucket<?> underflowBucket = null;
1129        AlphabeticIndex.Bucket<?> overflowBucket = null;
1130        AlphabeticIndex.Bucket<?> inflowBucket = null;
1131        for (AlphabeticIndex.Bucket<?> bucket : index) {
1132            switch (bucket.getLabelType()) {
1133                case UNDERFLOW:
1134                    assertNull("LabelType not null", underflowBucket);
1135                    underflowBucket = bucket;
1136                    break;
1137                case OVERFLOW:
1138                    assertNull("LabelType not null", overflowBucket);
1139                    overflowBucket = bucket;
1140                    break;
1141                case INFLOW:
1142                    assertNull("LabelType not null", inflowBucket);
1143                    inflowBucket = bucket;
1144                    break;
1145            }
1146        }
1147        assertNotNull("No bucket 'underflow'", underflowBucket);
1148        assertEquals("Wrong bucket label", "underflow", underflowBucket.getLabel());
1149        assertEquals("Wrong bucket label", "underflow", index.getUnderflowLabel());
1150        assertEquals("Bucket size not 1", 1, underflowBucket.size());
1151        assertNotNull("No bucket 'overflow'", overflowBucket);
1152        assertEquals("Wrong bucket label", "overflow", overflowBucket.getLabel());
1153        assertEquals("Wrong bucket label", "overflow", index.getOverflowLabel());
1154        assertEquals("Bucket size not 1", 1, overflowBucket.size());
1155        assertNotNull("No bucket 'inflow'", inflowBucket);
1156        assertEquals("Wrong bucket label", "inflow", inflowBucket.getLabel());
1157        assertEquals("Wrong bucket label", "inflow", index.getInflowLabel());
1158        assertEquals("Bucket size not 1", 1, inflowBucket.size());
1159    }
1160}
1161