1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5 *******************************************************************************
6 * Copyright (C) 2008-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10package android.icu.dev.test.collator;
11import java.util.ArrayList;
12import java.util.Arrays;
13import java.util.Collection;
14import java.util.Iterator;
15import java.util.LinkedHashSet;
16import java.util.List;
17import java.util.Locale;
18import java.util.Set;
19import java.util.TreeSet;
20
21import org.junit.Test;
22
23import android.icu.dev.test.TestFmwk;
24import android.icu.dev.util.CollectionUtilities;
25import android.icu.impl.ICUDebug;
26import android.icu.impl.Row;
27import android.icu.impl.Row.R4;
28import android.icu.lang.UCharacter;
29import android.icu.lang.UProperty;
30import android.icu.lang.UScript;
31import android.icu.text.AlphabeticIndex;
32import android.icu.text.AlphabeticIndex.Bucket;
33import android.icu.text.AlphabeticIndex.Bucket.LabelType;
34import android.icu.text.AlphabeticIndex.ImmutableIndex;
35import android.icu.text.AlphabeticIndex.Record;
36import android.icu.text.Collator;
37import android.icu.text.Normalizer2;
38import android.icu.text.RawCollationKey;
39import android.icu.text.RuleBasedCollator;
40import android.icu.text.UTF16;
41import android.icu.text.UnicodeSet;
42import android.icu.util.ULocale;
43
44/**
45 * @author Mark Davis
46 */
47public class AlphabeticIndexTest extends TestFmwk {
48    /**
49     *
50     */
51    private static final String ARROW = "\u2192";
52    private static final boolean DEBUG = ICUDebug.enabled("alphabeticindex");
53
54    public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList(
55            "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl",
56            "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da",
57            "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr",
58            "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk",
59            "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta",
60            "te", "mr", "ur", "ml", "kn", "gu", "or"));
61    private String[][] localeAndIndexCharactersLists = new String[][] {
62            /* Arabic*/ {"ar", "\u0627:\u0628:\u062A:\u062B:\u062C:\u062D:\u062E:\u062F:\u0630:\u0631:\u0632:\u0633:\u0634:\u0635:\u0636:\u0637:\u0638:\u0639:\u063A:\u0641:\u0642:\u0643:\u0644:\u0645:\u0646:\u0647:\u0648:\u064A"},
63            /* Bulgarian*/  {"bg", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
64            /* Catalan*/    {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
65            /* Czech*/  {"cs", "A:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\u0158:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
66            /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
67            /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
68            /* Greek*/  {"el", "\u0391:\u0392:\u0393:\u0394:\u0395:\u0396:\u0397:\u0398:\u0399:\u039A:\u039B:\u039C:\u039D:\u039E:\u039F:\u03A0:\u03A1:\u03A3:\u03A4:\u03A5:\u03A6:\u03A7:\u03A8:\u03A9"},
69            /* English*/    {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
70            /* Spanish*/    {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
71            /* Estonian*/   {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:Z:\u017D:T:U:V:\u00D5:\u00C4:\u00D6:\u00DC:X:Y"},
72            /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
73            /* Finnish*/    {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
74            /* Filipino*/   {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:Ng:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
75            /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
76            /* Hebrew*/ {"he", "\u05D0:\u05D1:\u05D2:\u05D3:\u05D4:\u05D5:\u05D6:\u05D7:\u05D8:\u05D9:\u05DB:\u05DC:\u05DE:\u05E0:\u05E1:\u05E2:\u05E4:\u05E6:\u05E7:\u05E8:\u05E9:\u05EA"},
77            /* Icelandic*/  {"is", "A:\u00C1:B:C:D:\u00D0:E:\u00C9:F:G:H:I:\u00CD:J:K:L:M:N:O:\u00D3:P:Q:R:S:T:U:\u00DA:V:W:X:Y:\u00DD:Z:\u00DE:\u00C6:\u00D6"},
78            /* Italian*/    {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
79            /* Japanese*/   {"ja", "\u3042:\u304B:\u3055:\u305F:\u306A:\u306F:\u307E:\u3084:\u3089:\u308F"},
80            /* Korean*/ {"ko", "\u3131:\u3134:\u3137:\u3139:\u3141:\u3142:\u3145:\u3147:\u3148:\u314A:\u314B:\u314C:\u314D:\u314E"},
81            /* Lithuanian*/ {"lt", "A:B:C:\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\u0160:T:U:V:Z:\u017D"},
82            /* Latvian*/    {"lv", "A:B:C:\u010C:D:E:F:G:\u0122:H:I:J:K:\u0136:L:\u013B:M:N:\u0145:O:P:Q:R:S:\u0160:T:U:V:W:X:Z:\u017D"},
83            /* Norwegian Bokm\u00E5l*/  {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
84            /* Dutch*/  {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
85            /* Polish*/ {"pl", "A:\u0104:B:C:\u0106:D:E:\u0118:F:G:H:I:J:K:L:\u0141:M:N:\u0143:O:\u00D3:P:Q:R:S:\u015A:T:U:V:W:X:Y:Z:\u0179:\u017B"},
86            /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
87            /* Romanian*/   {"ro", "A:\u0102:\u00C2:B:C:D:E:F:G:H:I:\u00CE:J:K:L:M:N:O:P:Q:R:S:\u0218:T:\u021A:U:V:W:X:Y:Z"},
88            /* Russian*/    {"ru", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042B:\u042D:\u042E:\u042F"},
89            /* Slovak*/ {"sk", "A:\u00C4:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\u00D4:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
90            /* Slovenian*/  {"sl", "A:B:C:\u010C:\u0106:D:\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"},
91            /* Serbian*/    {"sr", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0402:\u0415:\u0416:\u0417:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040B:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
92            /* Swedish*/    {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"},
93            /* Turkish*/    {"tr", "A:B:C:\u00C7:D:E:F:G:H:I:\u0130:J:K:L:M:N:O:\u00D6:P:Q:R:S:\u015E:T:U:\u00DC:V:W:X:Y:Z"},
94            /* Ukrainian*/  {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"},
95            /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"},
96            /* Chinese*/    {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
97            /* Chinese (Traditional Han)*/  {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"},
98
99            // Comment these out to make the test run faster. Later, make these run under extended
100
101            //            /* Afrikaans*/  {"af", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
102            //            /* Akan*/   {"ak", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:O:\u0186:P:Q:R:S:T:U:V:W:X:Y:Z"},
103            //            /* Asu*/    {"asa", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
104            //            /* Azerbaijani*/    {"az", "A:B:C:\u00C7:D:E:\u018F:F:G:\u011E:H:X:I:\u0130:J:K:Q:L:M:N:O:\u00D6:P:R:S:\u015E:T:U:\u00DC:V:W:Y:Z"},
105            //            /* Belarusian*/ {"be", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0406:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u042B:\u042D:\u042E:\u042F"},
106            //            /* Bemba*/  {"bem", "A:B:C:E:F:G:I:J:K:L:M:N:O:P:S:T:U:W:Y"},
107            //            /* Bena*/   {"bez", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:Y:Z"},
108            //            /* Bambara*/    {"bm", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:\u0186:P:R:S:T:U:W:Y:Z"},
109            //            /* Tibetan*/    {"bo", "\u0F40:\u0F41:\u0F42:\u0F44:\u0F45:\u0F46:\u0F47:\u0F49:\u0F4F:\u0F50:\u0F51:\u0F53:\u0F54:\u0F55:\u0F56:\u0F58:\u0F59:\u0F5A:\u0F5B:\u0F5D:\u0F5E:\u0F5F:\u0F60:\u0F61:\u0F62:\u0F63:\u0F64:\u0F66:\u0F67:\u0F68"},
110            //            /* Chiga*/  {"cgg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
111            //            /* Cherokee*/   {"chr", "\u13A0:\u13A6:\u13AD:\u13B3:\u13B9:\u13BE:\u13C6:\u13CC:\u13D3:\u13DC:\u13E3:\u13E9:\u13EF"},
112            //            /* Welsh*/  {"cy", "A:B:C:CH:D:E:F:FF:G:H:I:J:L:LL:M:N:O:P:PH:R:RH:S:T:TH:U:W:Y"},
113            //            /* Taita*/  {"dav", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
114            //            /* Embu*/   {"ebu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
115            //            /* Ewe*/    {"ee", "A:B:C:D:\u0189:E:\u0190:F:\u0191:G:\u0194:H:I:J:K:L:M:N:\u014A:O:\u0186:P:Q:R:S:T:U:V:\u01B2:W:X:Y:Z"},
116            //            /* Esperanto*/  {"eo", "A:B:C:\u0108:D:E:F:G:\u011C:H:\u0124:I:J:\u0134:K:L:M:N:O:P:R:S:\u015C:T:U:\u016C:V:Z"},
117            //            /* Fulah*/  {"ff", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:L:M:N:\u014A:O:P:R:S:T:U:W:Y:\u01B3"},
118            //            /* Faroese*/    {"fo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8"},
119            //            /* Gusii*/  {"guz", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
120            //            /* Hausa*/  {"ha", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:\u0198:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
121            //            /* Igbo*/   {"ig", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
122            //            /* Machame*/    {"jmc", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
123            //            /* Kabyle*/ {"kab", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:P:Q:R:S:T:U:W:X:Y:Z"},
124            //            /* Kamba*/  {"kam", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
125            //            /* Makonde*/    {"kde", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
126            //            /* Kabuverdianu*/   {"kea", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:X:Z"},
127            //            /* Koyra Chiini*/   {"khq", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
128            //            /* Kikuyu*/ {"ki", "A:B:C:D:E:G:H:I:J:K:M:N:O:R:T:U:W:Y"},
129            //            /* Kalenjin*/   {"kln", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:W:Y"},
130            //            /* Langi*/  {"lag", "A:B:C:D:E:F:G:H:I:\u0197:J:K:L:M:N:O:P:Q:R:S:T:U:\u0244:V:W:X:Y:Z"},
131            //            /* Ganda*/  {"lg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
132            //            /* Luo*/    {"luo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
133            //            /* Luyia*/  {"luy", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
134            //            /* Masai*/  {"mas", "A:B:C:D:E:\u0190:G:H:I:\u0197:J:K:L:M:N:\u014A:O:\u0186:P:R:S:T:U:\u0244:W:Y"},
135            //            /* Meru*/   {"mer", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
136            //            /* Morisyen*/   {"mfe", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y:Z"},
137            //            /* Malagasy*/   {"mg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:V:Y:Z"},
138            // This should be the correct data.  Commented till it is fixed in CLDR collation data.
139            // {"mk", "\u0410:\u0411:\u0412:\u0413:\u0403:\u0414:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u040C:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
140            //            /* Macedonian*/ {"mk", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0403:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040C:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"},
141            // This should be the correct data.  Commented till it is fixed in CLDR collation data.
142            // {"mt", "A:B:C:\u010A:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
143            //            /* Maltese*/    {"mt", "A:B:\u010A:C:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"},
144            //            /* Nama*/   {"naq", "A:B:C:D:E:F:G:H:I:K:M:N:O:P:Q:R:S:T:U:W:X:Y:Z"},
145            //            /* North Ndebele*/  {"nd", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:S:T:U:V:W:X:Y:Z"},
146            //            /* Norwegian Nynorsk*/  {"nn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"},
147            //            /* Nyankole*/   {"nyn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
148            //            /* Oromo*/  {"om", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
149            //            /* Romansh*/    {"rm", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
150            //            /* Rombo*/  {"rof", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
151            //            /* Kinyarwanda*/    {"rw", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
152            //            /* Rwa*/    {"rwk", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
153            //            /* Samburu*/    {"saq", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"},
154            //            /* Sena*/   {"seh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
155            //            /* Koyraboro Senni*/    {"ses", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"},
156            //            /* Sango*/  {"sg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
157            //            /* Tachelhit*/  {"shi", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
158            //            /* Tachelhit (Tifinagh)*/   {"shi_Tfng", "\u2D30:\u2D31:\u2D33:\u2D37:\u2D39:\u2D3B:\u2D3C:\u2D3D:\u2D40:\u2D43:\u2D44:\u2D45:\u2D47:\u2D49:\u2D4A:\u2D4D:\u2D4E:\u2D4F:\u2D53:\u2D54:\u2D55:\u2D56:\u2D59:\u2D5A:\u2D5B:\u2D5C:\u2D5F:\u2D61:\u2D62:\u2D63:\u2D65"},
159            //            /* Shona*/  {"sn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
160            //            /* Teso*/   {"teo", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y"},
161            //            /* Tonga*/  {"to", "A:B:C:D:E:F:G:H:\u02BB:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
162            //            /* Central Morocco Tamazight*/  {"tzm", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"},
163            //            /* Uzbek (Latin)*/  {"uz_Latn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u02BF"},
164            //            /* Vunjo*/  {"vun", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"},
165            //            /* Soga*/   {"xog", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
166            //            /* Yoruba*/ {"yo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
167
168    };
169
170//    public void TestAAKeyword() {
171//    ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance(
172//            ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh");
173//    showBundle(rb, 0);
174//        String[] keywords = Collator.getKeywords();
175//        System.out.println(Arrays.asList(keywords));
176//        String locale = "zh";
177//        ULocale ulocale = new ULocale(locale);
178//        for (String keyword : keywords) {
179//            List<String> values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false));
180//            List<String> allValues = Arrays.asList(Collator.getKeywordValues(keyword));
181//            for (String value : allValues) {
182//                System.out.println(keyword + "=" + value);
183//                checkKeyword(locale, value, values.contains(value));
184//            }
185//        }
186//    }
187//
188//    private void checkKeyword(String locale, String collationValue, boolean shouldExist) {
189//        final ULocale base = new ULocale(locale);
190//        final ULocale desired = new ULocale(locale + "@collation=" + collationValue);
191//        Collator foo = Collator.getInstance(desired);
192//        ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE);
193//        if (shouldExist) {
194//            assertEquals("actual should match desired", desired, actual);
195//        } else {
196//            assertEquals("actual should match base", base, actual);
197//        }
198//        int comp = foo.compare("a", "ā");
199//        assertEquals("should fall back to default for zh", -1, comp);
200//    }
201//
202//    /**
203//     * @param rb
204//     * @param i
205//     */
206//    private static void showBundle(UResourceBundle rb, int i) {
207//        for (String key : rb.keySet()) {
208//            System.out.print("\n" + Utility.repeat("  ", i) + key);
209//            UResourceBundle rb2 = rb.get(key);
210//            showBundle(rb2, i+1);
211//        }
212//    }
213
214
215    @Test
216    public void TestA() {
217        String[][] tests = {{"zh_Hant", "渡辺", "12劃"},
218                {"zh", "渡辺", "D"}
219                /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/
220                };
221        for (String[] test : tests) {
222            AlphabeticIndex<Integer> alphabeticIndex = new AlphabeticIndex<Integer>(new ULocale(test[0]));
223            final String probe = test[1];
224            final String expectedLabel = test[2];
225            alphabeticIndex.addRecord(probe, 1);
226            List labels = alphabeticIndex.getBucketLabels();
227            logln(labels.toString());
228            Bucket<Integer> bucket = find(alphabeticIndex, probe);
229            assertEquals("locale " + test[0] + " name=" + probe + " in bucket",
230                    expectedLabel, bucket.getLabel());
231        }
232    }
233
234    private Bucket<Integer> find(AlphabeticIndex<Integer> alphabeticIndex, final String probe) {
235        for (Bucket<Integer> bucket : alphabeticIndex) {
236            for (Record<Integer> record : bucket) {
237                if (record.getName().equals(probe)) {
238                    return bucket;
239                }
240            }
241        }
242        return null;
243    }
244
245    @Test
246    public void TestFirstCharacters() {
247
248        AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH);
249        RuleBasedCollator collator = alphabeticIndex.getCollator();
250        collator.setStrength(Collator.IDENTICAL);
251        Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts();
252        // Verify that each script is represented exactly once.
253        // Exclude pseudo-scripts like Common (no letters).
254        // Exclude scripts like Braille and Sutton SignWriting
255        // because they only have symbols, not letters.
256        UnicodeSet missingScripts = new UnicodeSet(
257                "[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]");
258        String last = "";
259        for (String index : firsts) {
260            if (collator.compare(last,index) >= 0) {
261                errln("Characters not in order: " + last + " !< " + index);
262            }
263            int script = getFirstRealScript(index);
264            if (script == UScript.UNKNOWN) { continue; }
265            UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script);
266            if (missingScripts.containsNone(s)) {
267                errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false));
268            }
269            missingScripts.removeAll(s);
270        }
271        if (missingScripts.size() != 0) {
272            String missingScriptNames = "";
273            UnicodeSet missingChars = new UnicodeSet(missingScripts);
274            for(;;) {
275                int c = missingChars.charAt(0);
276                if (c < 0) {
277                    break;
278                }
279                int script = UScript.getScript(c);
280                missingScriptNames += " " +
281                        UCharacter.getPropertyValueName(
282                                UProperty.SCRIPT, script, UProperty.NameChoice.SHORT);
283                missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script));
284            }
285            errln("Missing character from:" + missingScriptNames + " -- " + missingScripts);
286        }
287    }
288
289    private static final int getFirstRealScript(CharSequence s) {
290        for (int i = 0; i < s.length();) {
291            int c = Character.codePointAt(s, i);
292            int script = UScript.getScript(c);
293            if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) {
294                return script;
295            }
296            i += Character.charCount(c);
297        }
298        return UScript.UNKNOWN;
299    }
300
301    @Test
302    public void TestBuckets() {
303        ULocale additionalLocale = ULocale.ENGLISH;
304
305        for (String[] pair : localeAndIndexCharactersLists) {
306            checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron");
307        }
308    }
309
310    @Test
311    public void TestEmpty() {
312        // just verify that it doesn't blow up.
313        Set<ULocale> locales = new LinkedHashSet<ULocale>();
314        locales.add(ULocale.ROOT);
315        locales.addAll(Arrays.asList(ULocale.getAvailableLocales()));
316        for (ULocale locale : locales) {
317            try {
318                AlphabeticIndex<String> alphabeticIndex = new AlphabeticIndex(locale);
319                alphabeticIndex.addRecord("hi", "HI");
320                for (Bucket<String> bucket : alphabeticIndex) {
321                    @SuppressWarnings("unused")
322                    LabelType labelType = bucket.getLabelType();
323                }
324            } catch (Exception e) {
325                errln("Exception when creating AlphabeticIndex for:\t" + locale.toLanguageTag());
326                errln(e.toString());
327            }
328        }
329    }
330
331    @Test
332    public void TestSetGetSpecialLabels() {
333        AlphabeticIndex index = new AlphabeticIndex(Locale.GERMAN).addLabels(new Locale("ru"));
334        index.setUnderflowLabel("__");
335        index.setInflowLabel("--");
336        index.setOverflowLabel("^^");
337        assertEquals("underflow label", "__", index.getUnderflowLabel());
338        assertEquals("inflow label", "--", index.getInflowLabel());
339        assertEquals("overflow label", "^^", index.getOverflowLabel());
340
341        ImmutableIndex ii = index.buildImmutableIndex();
342        assertEquals("0 -> underflow", "__", ii.getBucket(ii.getBucketIndex("0")).getLabel());
343        assertEquals("Ω -> inflow", "--", ii.getBucket(ii.getBucketIndex("Ω")).getLabel());
344        assertEquals("字 -> overflow", "^^", ii.getBucket(ii.getBucketIndex("字")).getLabel());
345    }
346
347    @Test
348    public void TestInflow() {
349        Object[][] tests = {
350                {0, ULocale.ENGLISH},
351                {0, ULocale.ENGLISH, new ULocale("el")},
352                {1, ULocale.ENGLISH, new ULocale("ru")},
353                {0, ULocale.ENGLISH, new ULocale("el"), new UnicodeSet("[\u2C80]"), new ULocale("ru")},
354                {0, ULocale.ENGLISH},
355                {2, ULocale.ENGLISH, new ULocale("ru"), ULocale.JAPANESE},
356        };
357        for (Object[] test : tests) {
358            int expected = (Integer) test[0];
359            AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex((ULocale)test[1]);
360            for (int i = 2; i < test.length; ++i) {
361                if (test[i] instanceof ULocale) {
362                    alphabeticIndex.addLabels((ULocale)test[i]);
363                } else {
364                    alphabeticIndex.addLabels((UnicodeSet)test[i]);
365                }
366            }
367            Counter<AlphabeticIndex.Bucket.LabelType> counter = new Counter();
368            for (Bucket<Double> bucket : alphabeticIndex) {
369                LabelType labelType = bucket.getLabelType();
370                counter.add(labelType, 1);
371            }
372            String printList = Arrays.asList(test).toString();
373            assertEquals(LabelType.UNDERFLOW + "\t" + printList, 1, counter.get(LabelType.UNDERFLOW));
374            assertEquals(LabelType.INFLOW + "\t" + printList, expected, counter.get(LabelType.INFLOW));
375            if (expected != counter.get(LabelType.INFLOW)) {
376                // for debugging
377                AlphabeticIndex<Double> indexCharacters2 = new AlphabeticIndex((ULocale)test[1]);
378                for (int i = 2; i < test.length; ++i) {
379                    if (test[i] instanceof ULocale) {
380                        indexCharacters2.addLabels((ULocale)test[i]);
381                    } else {
382                        indexCharacters2.addLabels((UnicodeSet)test[i]);
383                    }
384                }
385                List<Bucket<Double>> buckets = CollectionUtilities.addAll(alphabeticIndex.iterator(), new ArrayList<Bucket<Double>>());
386                logln(buckets.toString());
387            }
388            assertEquals(LabelType.OVERFLOW + "\t" + printList, 1, counter.get(LabelType.OVERFLOW));
389        }
390    }
391
392    private void checkBuckets(String localeString, String[] test, ULocale additionalLocale, String testBucket, String... items) {
393        StringBuilder UI = new StringBuilder();
394        ULocale desiredLocale = new ULocale(localeString);
395
396        // Create a simple index where the values for the strings are Integers, and add the strings
397        AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale);
398        int counter = 0;
399        Counter<String> itemCount = new Counter();
400        for (String item : test) {
401            index.addRecord(item, counter++);
402            itemCount.add(item, 1);
403        }
404        assertEquals("getRecordCount()", (int)itemCount.getTotal(), index.getRecordCount());  // code coverage
405
406        List<String> labels = index.getBucketLabels();
407        ImmutableIndex<Integer> immIndex = index.buildImmutableIndex();
408
409        logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t"
410                + index.getCollator().getLocale(ULocale.ACTUAL_LOCALE));
411        UI.setLength(0);
412        UI.append(desiredLocale + "\t");
413        boolean showAll = true;
414
415        // Show index at top. We could skip or gray out empty buckets
416        for (AlphabeticIndex.Bucket<Integer> bucket : index) {
417            if (showAll || bucket.size() != 0) {
418                showLabelAtTop(UI, bucket.getLabel());
419            }
420        }
421        logln(UI.toString());
422
423        // Show the buckets with their contents, skipping empty buckets
424        int bucketIndex = 0;
425        for (Bucket<Integer> bucket : index) {
426            assertEquals("bucket label vs. iterator",
427                    labels.get(bucketIndex), bucket.getLabel());
428            assertEquals("bucket label vs. immutable",
429                    labels.get(bucketIndex), immIndex.getBucket(bucketIndex).getLabel());
430            assertEquals("bucket label type vs. immutable",
431                    bucket.getLabelType(), immIndex.getBucket(bucketIndex).getLabelType());
432            for (Record<Integer> r : bucket) {
433                CharSequence name = r.getName();
434                assertEquals("getBucketIndex(" + name + ")",
435                        bucketIndex, index.getBucketIndex(name));
436                assertEquals("immutable getBucketIndex(" + name + ")",
437                        bucketIndex, immIndex.getBucketIndex(name));
438            }
439            if (bucket.getLabel().equals(testBucket)) {
440                Counter<String> keys = getKeys(bucket);
441                for (String item : items) {
442                    long globalCount = itemCount.get(item);
443                    long localeCount = keys.get(item);
444                    if (globalCount != localeCount) {
445                        errln("Error: in " + "'" + testBucket + "', '" + item + "' should have count "
446                                + globalCount + " but has count " + localeCount);
447                    }
448
449                }
450            }
451
452            if (bucket.size() != 0) {
453                showLabelInList(UI, bucket.getLabel());
454                for (AlphabeticIndex.Record<Integer> item : bucket) {
455                    showIndexedItem(UI, item.getName(), item.getData());
456                }
457                logln(UI.toString());
458            }
459            ++bucketIndex;
460        }
461        assertEquals("getBucketCount()", bucketIndex, index.getBucketCount());
462        assertEquals("immutable getBucketCount()", bucketIndex, immIndex.getBucketCount());
463
464        assertNull("immutable getBucket(-1)", immIndex.getBucket(-1));
465        assertNull("immutable getBucket(count)", immIndex.getBucket(bucketIndex));
466
467        for (Bucket<Integer> bucket : immIndex) {
468            assertEquals("immutable bucket size", 0, bucket.size());
469            assertFalse("immutable bucket iterator.hasNext()", bucket.iterator().hasNext());
470        }
471    }
472
473    public <T> void showIndex(AlphabeticIndex<T> index, boolean showEmpty) {
474        logln("Actual");
475        StringBuilder UI = new StringBuilder();
476        for (Bucket<T> bucket : index) {
477            if (showEmpty || bucket.size() != 0) {
478                showLabelInList(UI, bucket.getLabel());
479                for (Record<T> item : bucket) {
480                    showIndexedItem(UI, item.getName(), item.getData());
481                }
482                logln(UI.toString());
483            }
484        }
485    }
486
487    /**
488     * @param myBucketLabels
489     * @param myBucketContents
490     * @param b
491     */
492    private void showIndex(List<String> myBucketLabels, ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents, boolean showEmpty) {
493        logln("Alternative");
494        StringBuilder UI = new StringBuilder();
495
496        for (int i = 0; i < myBucketLabels.size(); ++i) {
497            Set<R4<RawCollationKey, String, Integer, Double>> bucket = myBucketContents.get(i);
498            if (!showEmpty && bucket.size() == 0) {
499                continue;
500            }
501            UI.setLength(0);
502            UI.append("*").append(myBucketLabels.get(i));
503            for (R4<RawCollationKey, String, Integer, Double> item : bucket) {
504                UI.append("\t ").append(item.get1().toString()).append(ARROW).append(item.get3().toString());
505            }
506            logln(UI.toString());
507        }
508    }
509
510    private void showLabelAtTop(StringBuilder buffer, String label) {
511        buffer.append(label + " ");
512    }
513
514    private <T> void showIndexedItem(StringBuilder buffer, CharSequence key, T value) {
515        buffer.append("\t " + key + ARROW + value);
516    }
517
518    private void showLabelInList(StringBuilder buffer, String label) {
519        buffer.setLength(0);
520        buffer.append(label);
521    }
522
523    private Counter<String> getKeys(AlphabeticIndex.Bucket<Integer> entry) {
524        Counter<String> keys = new Counter<String>();
525        for (AlphabeticIndex.Record x : entry) {
526            String key = x.getName().toString();
527            keys.add(key, 1);
528        }
529        return keys;
530    }
531
532    @Test
533    public void TestIndexCharactersList() {
534        for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) {
535            ULocale locale = new ULocale(localeAndIndexCharacters[0]);
536            String expectedIndexCharacters = "\u2026:" + localeAndIndexCharacters[1] + ":\u2026";
537            Collection<String> alphabeticIndex = new AlphabeticIndex(locale).getBucketLabels();
538
539            // Join the elements of the list to a string with delimiter ":"
540            StringBuilder sb = new StringBuilder();
541            Iterator<String> iter = alphabeticIndex.iterator();
542            while (iter.hasNext()) {
543                sb.append(iter.next());
544                if (!iter.hasNext()) {
545                    break;
546                }
547                sb.append(":");
548            }
549            String actualIndexCharacters = sb.toString();
550            if (!expectedIndexCharacters.equals(actualIndexCharacters)) {
551                errln("Test failed for locale " + localeAndIndexCharacters[0] +
552                        "\n  Expected = |" + expectedIndexCharacters + "|\n  actual   = |" + actualIndexCharacters + "|");
553            }
554        }
555    }
556
557    @Test
558    public void TestBasics() {
559        ULocale[] list = ULocale.getAvailableLocales();
560        // get keywords combinations
561        // don't bother with multiple combinations at this point
562        List keywords = new ArrayList();
563        keywords.add("");
564
565        String[] collationValues = Collator.getKeywordValues("collation");
566        for (int j = 0; j < collationValues.length; ++j) {
567            keywords.add("@collation=" + collationValues[j]);
568        }
569
570        for (int i = 0; i < list.length; ++i) {
571            for (Iterator it = keywords.iterator(); it.hasNext();) {
572                String collationValue = (String) it.next();
573                String localeString = list[i].toString();
574                if (!KEY_LOCALES.contains(localeString)) continue; // TODO change in exhaustive
575                ULocale locale = new ULocale(localeString + collationValue);
576                if (collationValue.length() > 0 && !Collator.getFunctionalEquivalent("collation", locale).equals(locale)) {
577                    //logln("Skipping " + locale);
578                    continue;
579                }
580
581                if (locale.getCountry().length() != 0) {
582                    continue;
583                }
584                boolean isUnihan = collationValue.contains("unihan");
585                AlphabeticIndex alphabeticIndex = new AlphabeticIndex(locale);
586                if (isUnihan) {
587                    // Unihan tailorings have a label per radical, and there are at least 214,
588                    // if not more when simplified radicals are distinguished.
589                    alphabeticIndex.setMaxLabelCount(500);
590                }
591                final Collection mainChars = alphabeticIndex.getBucketLabels();
592                String mainCharString = mainChars.toString();
593                if (mainCharString.length() > 500) {
594                    mainCharString = mainCharString.substring(0,500) + "...";
595                }
596                logln(mainChars.size() + "\t" + locale + "\t" + locale.getDisplayName(ULocale.ENGLISH));
597                logln("Index:\t" + mainCharString);
598                if (!isUnihan && mainChars.size() > 100) {
599                    errln("Index character set too large: " +
600                            locale + " [" + mainChars.size() + "]:\n    " + mainChars);
601                }
602            }
603        }
604    }
605
606    @Test
607    public void TestClientSupport() {
608        for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"}
609            ULocale ulocale = new ULocale(localeString);
610            AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex<Double>(ulocale).addLabels(Locale.ENGLISH);
611            RuleBasedCollator collator = alphabeticIndex.getCollator();
612            String [][] tests;
613
614            if (!localeString.equals("zh") ) {
615                tests = new String[][] {SimpleTests};
616            } else {
617                tests = new String[][] {SimpleTests, hackPinyin, simplifiedNames};
618            }
619
620            for (String [] shortTest : tests) {
621                double testValue = 100;
622                alphabeticIndex.clearRecords();
623                for (String name : shortTest) {
624                    alphabeticIndex.addRecord(name, testValue++);
625                }
626
627                if (DEBUG) showIndex(alphabeticIndex, false);
628
629                // make my own copy
630                testValue = 100;
631                List<String> myBucketLabels = alphabeticIndex.getBucketLabels();
632                ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents = new ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>>(myBucketLabels.size());
633                for (int i = 0; i < myBucketLabels.size(); ++i) {
634                    myBucketContents.add(new TreeSet<R4<RawCollationKey, String, Integer, Double>>());
635                }
636                for (String name : shortTest) {
637                    int bucketIndex = alphabeticIndex.getBucketIndex(name);
638                    if (bucketIndex > myBucketContents.size()) {
639                        alphabeticIndex.getBucketIndex(name); // call again for debugging
640                    }
641                    Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(bucketIndex);
642                    RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null);
643                    R4<RawCollationKey, String, Integer, Double> row = Row.of(rawCollationKey, name, name.length(), testValue++);
644                    myBucket.add(row);
645                }
646                if (DEBUG) showIndex(myBucketLabels, myBucketContents, false);
647
648                // now compare
649                int index = 0;
650                boolean gotError = false;
651                for (AlphabeticIndex.Bucket<Double> bucket : alphabeticIndex) {
652                    String bucketLabel = bucket.getLabel();
653                    String myLabel = myBucketLabels.get(index);
654                    if (!bucketLabel.equals(myLabel)) {
655                        gotError |= !assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel);
656                    }
657                    Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(index);
658                    Iterator<R4<RawCollationKey, String, Integer, Double>> myBucketIterator = myBucket.iterator();
659                    int recordIndex = 0;
660                    for (Record<Double> record : bucket) {
661                        String myName = null;
662                        if (myBucketIterator.hasNext()) {
663                            R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
664                            myName = myRecord.get1();
665                        }
666                        if (!record.getName().equals(myName)) {
667                            gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", record.getName(), myName);
668                        }
669                    }
670                    while (myBucketIterator.hasNext()) {
671                        R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next();
672                        String myName = myRecord.get1();
673                        gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", null, myName);
674                    }
675                    index++;
676                }
677                if (gotError) {
678                    showIndex(myBucketLabels, myBucketContents, false);
679                    showIndex(alphabeticIndex, false);
680                }
681            }
682        }
683    }
684
685    @Test
686    public void TestFirstScriptCharacters() {
687        Collection<String> firstCharacters =
688                new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts();
689        Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT));
690        Collection<String> diff = new TreeSet<String>(firstCharacters);
691        diff.removeAll(expectedFirstCharacters);
692        assertTrue("First Characters contains unexpected ones: " + diff, diff.isEmpty());
693        diff.clear();
694        diff.addAll(expectedFirstCharacters);
695        diff.removeAll(firstCharacters);
696        assertTrue("First Characters missing expected ones: " + diff, diff.isEmpty());
697    }
698
699    private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze();
700
701    /**
702     * Returns a collection of all the "First" characters of scripts, according to the collation.
703     */
704    private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) {
705        String[] results = new String[UScript.CODE_LIMIT];
706        for (String current : TO_TRY) {
707            if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols.
708                continue;
709            }
710            int script = UScript.getScript(current.codePointAt(0));
711            if (results[script] == null) {
712                results[script] = current;
713            } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
714                results[script] = current;
715            }
716        }
717
718        try {
719            UnicodeSet extras = new UnicodeSet();
720            UnicodeSet expansions = new UnicodeSet();
721            ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true);
722            extras.addAll(expansions).removeAll(TO_TRY);
723            if (extras.size() != 0) {
724                Normalizer2 normalizer = Normalizer2.getNFKCInstance();
725                for (String current : extras) {
726                    if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) {
727                        continue;
728                    }
729                    int script = getFirstRealScript(current);
730                    if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; }
731                    if (results[script] == null) {
732                        results[script] = current;
733                    } else if (ruleBasedCollator.compare(current, results[script]) < 0) {
734                        results[script] = current;
735                    }
736                }
737            }
738        } catch (Exception e) {
739        } // why have a checked exception???
740
741        // TODO: We should not test that we get the same strings, but that we
742        // get strings that sort primary-equal to those from the implementation.
743
744        Collection<String> result = new ArrayList<String>();
745        for (int i = 0; i < results.length; ++i) {
746            if (results[i] != null) {
747                result.add(results[i]);
748            }
749        }
750        return result;
751    }
752
753    private static final boolean isUnassignedBoundary(CharSequence s) {
754        // The root collator provides a script-first-primary boundary contraction
755        // for the unassigned-implicit range.
756        return s.charAt(0) == 0xfdd1 &&
757                UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN;
758    }
759
760    @Test
761    public void TestZZZ() {
762        //            int x = 3;
763        //            AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH);
764        //            UnicodeSet additions = new UnicodeSet();
765        //            additions.add(0x410).add(0x415);  // Cyrillic
766        //            // additions.add(0x391).add(0x393);     // Greek
767        //            index.addLabels(additions);
768        //            int lc = index.getLabels().size();
769        //            List  labels = index.getLabels();
770        //            System.out.println("Label Count = " + lc + "\t" + labels);
771        //            System.out.println("Bucket Count =" + index.getBucketCount());
772    }
773
774    @Test
775    public void TestSimplified() {
776        checkBuckets("zh", simplifiedNames, ULocale.ENGLISH, "W", "\u897f");
777    }
778
779    @Test
780    public void TestTraditional() {
781        checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580");
782    }
783
784    static final String[] SimpleTests = {
785        "斎藤",
786        "\u1f2d\u03c1\u03b1",
787        "$", "\u00a3", "12", "2",
788        "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul",
789        "\u00deor", "\u00c5berg", "\u00d6stlund",
790        "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6",
791        "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac",
792        //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2",
793        "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4",
794        //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34"
795    };
796
797    static final String[] hackPinyin = {
798        "a", "\u5416", "\u58ba", //
799        "b", "\u516b", "\u62d4", "\u8500", //
800        "c", "\u5693", "\u7938", "\u9e7e", //
801        "d", "\u5491", "\u8fcf", "\u964a", //
802        "e","\u59b8", "\u92e8", "\u834b", //
803        "f", "\u53d1", "\u9197", "\u99a5", //
804        "g", "\u7324", "\u91d3", "\u8142", //
805        "h", "\u598e", "\u927f", "\u593b", //
806        "j", "\u4e0c", "\u6785", "\u9d58", //
807        "k", "\u5494", "\u958b", "\u7a52", //
808        "l", "\u5783", "\u62c9", "\u9ba5", //
809        "m", "\u5638", "\u9ebb", "\u65c0", //
810        "n", "\u62ff", "\u80ad", "\u685b", //
811        "o", "\u5662", "\u6bee", "\u8bb4", //
812        "p", "\u5991", "\u8019", "\u8c31", //
813        "q", "\u4e03", "\u6053", "\u7f56", //
814        "r", "\u5465", "\u72aa", "\u6e03", //
815        "s", "\u4ee8", "\u9491", "\u93c1", //
816        "t", "\u4ed6", "\u9248", "\u67dd", //
817        "w", "\u5c72", "\u5558", "\u5a7a", //
818        "x", "\u5915", "\u5438", "\u6bbe", //
819        "y", "\u4e2b", "\u82bd", "\u8574", //
820        "z", "\u5e00", "\u707d", "\u5c0a"
821    };
822
823    static final String[] simplifiedNames = {
824        "Abbot", "Morton", "Zachary", "Williams", "\u8d75", "\u94b1", "\u5b59", "\u674e", "\u5468", "\u5434", "\u90d1", "\u738b", "\u51af", "\u9648", "\u696e", "\u536b", "\u848b", "\u6c88",
825        "\u97e9", "\u6768", "\u6731", "\u79e6", "\u5c24", "\u8bb8", "\u4f55", "\u5415", "\u65bd", "\u5f20", "\u5b54", "\u66f9", "\u4e25", "\u534e", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8c22", "\u90b9",
826        "\u55bb", "\u67cf", "\u6c34", "\u7aa6", "\u7ae0", "\u4e91", "\u82cf", "\u6f58", "\u845b", "\u595a", "\u8303", "\u5f6d", "\u90ce", "\u9c81", "\u97e6", "\u660c", "\u9a6c", "\u82d7", "\u51e4", "\u82b1", "\u65b9",
827        "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9c8d", "\u53f2", "\u5510", "\u8d39", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8d3a", "\u502a", "\u6c64", "\u6ed5", "\u6bb7", "\u7f57", "\u6bd5", "\u90dd",
828        "\u90ac", "\u5b89", "\u5e38", "\u4e50", "\u4e8e", "\u65f6", "\u5085", "\u76ae", "\u535e", "\u9f50", "\u5eb7", "\u4f0d", "\u4f59", "\u5143", "\u535c", "\u987e", "\u5b5f", "\u5e73", "\u9ec4", "\u548c", "\u7a46",
829        "\u8427", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8d1d", "\u660e", "\u81e7", "\u8ba1", "\u4f0f", "\u6210", "\u6234", "\u8c08", "\u5b8b", "\u8305",
830        "\u5e9e", "\u718a", "\u7eaa", "\u8212", "\u5c48", "\u9879", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u84dd", "\u95fd", "\u5e2d", "\u5b63", "\u9ebb", "\u5f3a", "\u8d3e", "\u8def", "\u5a04", "\u5371",
831        "\u6c5f", "\u7ae5", "\u989c", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u953a", "\u5f90", "\u4e18", "\u9a86", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e",
832        "\u4e07", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u5362", "\u83ab", "\u7ecf", "\u623f", "\u88d8", "\u7f2a", "\u5e72", "\u89e3", "\u5e94", "\u5b97", "\u4e01", "\u5ba3", "\u8d32", "\u9093", "\u90c1", "\u5355",
833        "\u676d", "\u6d2a", "\u5305", "\u8bf8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u94ae", "\u9f9a", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9646", "\u8363", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
834        "\u60e0", "\u7504", "\u9eb9", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u50a8", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u4e4c", "\u7126", "\u5df4", "\u5f13",
835        "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8f66", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bab", "\u5b81", "\u4ec7", "\u683e", "\u66b4", "\u7518",
836        "\u659c", "\u5389", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5218", "\u666f", "\u8a79", "\u675f", "\u9f99", "\u53f6", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u84df", "\u8584", "\u5370", "\u5bbf",
837        "\u767d", "\u6000", "\u84b2", "\u90b0", "\u4ece", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8d56", "\u5353", "\u853a", "\u5c60", "\u8499", "\u6c60", "\u4e54", "\u9634", "\u90c1", "\u80e5", "\u80fd", "\u82cd",
838        "\u53cc", "\u95fb", "\u8398", "\u515a", "\u7fdf", "\u8c2d", "\u8d21", "\u52b3", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u90e6", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842",
839        "\u6fee", "\u725b", "\u5bff", "\u901a", "\u8fb9", "\u6248", "\u71d5", "\u5180", "\u90cf", "\u6d66", "\u5c1a", "\u519c", "\u6e29", "\u522b", "\u5e84", "\u664f", "\u67f4", "\u77bf", "\u960e", "\u5145", "\u6155",
840        "\u8fde", "\u8339", "\u4e60", "\u5ba6", "\u827e", "\u9c7c", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7ec8", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f",
841        "\u6ee1", "\u5f18", "\u5321", "\u56fd", "\u6587", "\u5bc7", "\u5e7f", "\u7984", "\u9619", "\u4e1c", "\u6b27", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e08", "\u5de9", "\u538d",
842        "\u8042", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u961a", "\u90a3", "\u7b80", "\u9976", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u517b", "\u97a0", "\u987b", "\u4e30",
843        "\u5de2", "\u5173", "\u84af", "\u76f8", "\u67e5", "\u540e", "\u8346", "\u7ea2", "\u6e38", "\u7afa", "\u6743", "\u9011", "\u76d6", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u9a6c", "\u4e0a\u5b98", "\u6b27\u9633",
844        "\u590f\u4faf", "\u8bf8\u845b", "\u95fb\u4eba", "\u4e1c\u65b9", "\u8d6b\u8fde", "\u7687\u752b", "\u5c09\u8fdf", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u9633", "\u6df3\u4e8e", "\u5355\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b59", "\u4ef2\u5b59",
845        "\u8f69\u8f95", "\u4ee4\u72d0", "\u953a\u79bb", "\u5b87\u6587", "\u957f\u5b59", "\u6155\u5bb9", "\u9c9c\u4e8e", "\u95fe\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8f66", "\u989b\u5b59", "\u7aef\u6728", "\u5deb\u9a6c",
846        "\u516c\u897f", "\u6f06\u96d5", "\u4e50\u6b63", "\u58e4\u9a77", "\u516c\u826f", "\u62d3\u62d4", "\u5939\u8c37", "\u5bb0\u7236", "\u8c37\u6881", "\u664b", "\u695a", "\u960e", "\u6cd5", "\u6c5d", "\u9122", "\u6d82", "\u94a6", "\u6bb5\u5e72", "\u767e\u91cc",
847        "\u4e1c\u90ed", "\u5357\u95e8", "\u547c\u5ef6", "\u5f52", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e05", "\u7f11", "\u4ea2", "\u51b5", "\u540e", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u4e1c\u95e8", "\u897f\u95e8",
848        "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f"
849    };
850
851    static final String[] traditionalNames = { "丁", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b",
852            "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523",
853            "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd",
854            "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a",
855            "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58",
856            "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7",
857            "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2",
858            "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5",
859            "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642",
860            "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867",
861            "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b",
862            "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08",
863            "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212",
864            "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d",
865            "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f",
866            "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8",
867            "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f",
868            "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79",
869            "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d",
870            "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b",
871            "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc",
872            "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72",
873            "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4",
874            "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168",
875            "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12",
876            "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f",
877            "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a",
878            "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22",
879            "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670",
880            "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a",
881            "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148",
882            "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a",
883            "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a",
884            "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6",
885            "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe",
886            "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321",
887            "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83",
888            "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641",
889            "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952",
890            "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2",
891            "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a",
892            "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98",
893            "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023",
894            "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f",
895            "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b",
896            "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b",
897            "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98",
898            "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac",
899            "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4",
900            "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122",
901            "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6",
902            "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1",
903            "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546",
904            "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a",
905            "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7",
906            "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26",
907            "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4",
908            "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35",
909            "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E",
910            "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A",
911            "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1",
912            "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C",
913            "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C",
914            "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3",
915            "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", };
916
917    /**
918     * Test AlphabeticIndex vs. root with script reordering.
919     */
920    @Test
921    public void TestHaniFirst() {
922        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
923        coll.setReorderCodes(UScript.HAN);
924        AlphabeticIndex index = new AlphabeticIndex(coll);
925        assertEquals("getBucketCount()", 1, index.getBucketCount());   // ... (underflow only)
926        index.addLabels(Locale.ENGLISH);
927        assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
928        int bucketIndex = index.getBucketIndex("\u897f");
929        assertEquals("getBucketIndex(U+897F)", 0, bucketIndex);  // underflow bucket
930        bucketIndex = index.getBucketIndex("i");
931        assertEquals("getBucketIndex(i)", 9, bucketIndex);
932        bucketIndex = index.getBucketIndex("\u03B1");
933        assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
934        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
935        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
936        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
937        bucketIndex = index.getBucketIndex("\uFFFF");
938        assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
939    }
940
941    /**
942     * Test AlphabeticIndex vs. Pinyin with script reordering.
943     */
944    @Test
945    public void TestPinyinFirst() {
946        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE);
947        coll.setReorderCodes(UScript.HAN);
948        AlphabeticIndex index = new AlphabeticIndex(coll);
949        assertEquals("getBucketCount()", 28, index.getBucketCount());   // ... A-Z ...
950        index.addLabels(Locale.CHINESE);
951        assertEquals("getBucketCount()", 28, index.getBucketCount());  // ... A-Z ...
952        int bucketIndex = index.getBucketIndex("\u897f");
953        assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex);
954        bucketIndex = index.getBucketIndex("i");
955        assertEquals("getBucketIndex(i)", 9, bucketIndex);
956        bucketIndex = index.getBucketIndex("\u03B1");
957        assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex);
958        // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group.
959        bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005));
960        assertEquals("getBucketIndex(U+50005)", 27, bucketIndex);
961        bucketIndex = index.getBucketIndex("\uFFFF");
962        assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex);
963    }
964
965    /**
966     * Test labels with multiple primary weights.
967     */
968    @Test
969    public void TestSchSt() {
970        AlphabeticIndex index = new AlphabeticIndex(ULocale.GERMAN);
971        index.addLabels(new UnicodeSet("[Æ{Sch*}{St*}]"));
972        // ... A Æ B-R S Sch St T-Z ...
973        ImmutableIndex immIndex = index.buildImmutableIndex();
974        assertEquals("getBucketCount()", 31, index.getBucketCount());
975        assertEquals("immutable getBucketCount()", 31, immIndex.getBucketCount());
976        String[][] testCases = new String[][] {
977            // name, bucket index, bucket label
978            { "Adelbert", "1", "A" },
979            { "Afrika", "1", "A" },
980            { "Æsculap", "2", "Æ" },
981            { "Aesthet", "2", "Æ" },
982            { "Berlin", "3", "B" },
983            { "Rilke", "19", "R" },
984            { "Sacher", "20", "S" },
985            { "Seiler", "20", "S" },
986            { "Sultan", "20", "S" },
987            { "Schiller", "21", "Sch" },
988            { "Steiff", "22", "St" },
989            { "Thomas", "23", "T" }
990        };
991        List<String> labels = index.getBucketLabels();
992        for (String[] testCase : testCases) {
993            String name = testCase[0];
994            int bucketIndex = Integer.valueOf(testCase[1]);
995            String label = testCase[2];
996            String msg = "getBucketIndex(" + name + ")";
997            assertEquals(msg, bucketIndex, index.getBucketIndex(name));
998            msg = "immutable " + msg;
999            assertEquals(msg, bucketIndex, immIndex.getBucketIndex(name));
1000            msg = "bucket label (" + name + ")";
1001            assertEquals(msg, label, labels.get(index.getBucketIndex(name)));
1002            msg = "immutable " + msg;
1003            assertEquals(msg, label, immIndex.getBucket(bucketIndex).getLabel());
1004        }
1005    }
1006
1007    /**
1008     * With no real labels, there should be only the underflow label.
1009     */
1010    @Test
1011    public void TestNoLabels() {
1012        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
1013        AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(coll);
1014        index.addRecord("\u897f", 0);
1015        index.addRecord("i", 0);
1016        index.addRecord("\u03B1", 0);
1017        assertEquals("getRecordCount()", 3, index.getRecordCount());  // code coverage
1018        assertEquals("getBucketCount()", 1, index.getBucketCount());  // ...
1019        Bucket<Integer> bucket = index.iterator().next();
1020        assertEquals("underflow label type", LabelType.UNDERFLOW, bucket.getLabelType());
1021        assertEquals("all records in the underflow bucket", 3, bucket.size());
1022    }
1023
1024    /**
1025     * Test with the Bopomofo-phonetic tailoring.
1026     */
1027    @Test
1028    public void TestChineseZhuyin() {
1029        AlphabeticIndex index = new AlphabeticIndex(ULocale.forLanguageTag("zh-u-co-zhuyin"));
1030        ImmutableIndex immIndex = index.buildImmutableIndex();
1031        assertEquals("getBucketCount()", 38, immIndex.getBucketCount());  // ... ㄅ ㄆ ㄇ ㄈ ㄉ -- ㄩ ...
1032        assertEquals("label 1", "ㄅ", immIndex.getBucket(1).getLabel());
1033        assertEquals("label 2", "ㄆ", immIndex.getBucket(2).getLabel());
1034        assertEquals("label 3", "ㄇ", immIndex.getBucket(3).getLabel());
1035        assertEquals("label 4", "ㄈ", immIndex.getBucket(4).getLabel());
1036        assertEquals("label 5", "ㄉ", immIndex.getBucket(5).getLabel());
1037    }
1038
1039    @Test
1040    public void TestJapaneseKanji() {
1041        AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE);
1042        AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
1043        // There are no index characters for Kanji in the Japanese standard collator.
1044        // They should all go into the overflow bucket.
1045        final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
1046        int overflowIndex = immIndex.getBucketCount() - 1;
1047        for(int i = 0; i < kanji.length; ++i) {
1048            String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]);
1049            assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i])));
1050        }
1051    }
1052
1053    @Test
1054    public void TestFrozenCollator() {
1055        // Ticket #9472
1056        RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da"));
1057        coll.setStrength(Collator.IDENTICAL);
1058        coll.freeze();
1059        // The AlphabeticIndex constructor used to throw an exception
1060        // because it cloned the collator (which preserves frozenness)
1061        // and set the clone's strength to PRIMARY.
1062        AlphabeticIndex index = new AlphabeticIndex(coll);
1063        assertEquals("same strength as input Collator",
1064                Collator.IDENTICAL, index.getCollator().getStrength());
1065    }
1066
1067    @Test
1068    public void TestChineseUnihan() {
1069        AlphabeticIndex index = new AlphabeticIndex(new ULocale("zh-u-co-unihan"));
1070        index.setMaxLabelCount(500);  // ICU 54 default is 99.
1071        assertEquals("getMaxLabelCount()", 500, index.getMaxLabelCount());  // code coverage
1072        AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex();
1073        int bucketCount = immIndex.getBucketCount();
1074        if(bucketCount < 216) {
1075            // There should be at least an underflow and overflow label,
1076            // and one for each of 214 radicals,
1077            // and maybe additional labels for simplified radicals.
1078            // (ICU4C: dataerrln(), prints only a warning if the data is missing)
1079            errln("too few buckets/labels for Chinese/unihan: " + bucketCount +
1080                    " (is zh/unihan data available?)");
1081            return;
1082        } else {
1083            logln("Chinese/unihan has " + bucketCount + " buckets/labels");
1084        }
1085        // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
1086        int bucketIndex = index.getBucketIndex("\u4e5d");
1087        assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
1088        // radical 100, and there is a 90' since Unicode 8
1089        bucketIndex = index.getBucketIndex("\u7527");
1090        assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
1091    }
1092
1093    @Test
1094    public void testAddLabels_Locale() {
1095        AlphabeticIndex<?> ulocaleIndex = new AlphabeticIndex<String>(ULocale.CANADA);
1096        AlphabeticIndex<?> localeIndex = new AlphabeticIndex<String>(Locale.CANADA);
1097        ulocaleIndex.addLabels(ULocale.SIMPLIFIED_CHINESE);
1098        localeIndex.addLabels(Locale.SIMPLIFIED_CHINESE);
1099        assertEquals("getBucketLables() results of ulocaleIndex and localeIndex differ",
1100                ulocaleIndex.getBucketLabels(), localeIndex.getBucketLabels());
1101    }
1102
1103    @Test
1104    public void testGetRecordCount_empty() {
1105        assertEquals("Record count of empty index not 0", 0,
1106                new AlphabeticIndex<String>(ULocale.CANADA).getRecordCount());
1107    }
1108
1109    @Test
1110    public void testGetRecordCount_withRecords() {
1111        assertEquals("Record count of index with one record not 1", 1,
1112                new AlphabeticIndex<String>(ULocale.CANADA).addRecord("foo", null).getRecordCount());
1113    }
1114
1115    /**
1116     * Check that setUnderflowLabel/setOverflowLabel/setInflowLabel correctly influence the name of
1117     * generated labels.
1118     */
1119    @Test
1120    public void testFlowLabels() {
1121        AlphabeticIndex<?> index = new AlphabeticIndex<String>(ULocale.ENGLISH)
1122                .addLabels(ULocale.forLanguageTag("ru"));
1123        index.setUnderflowLabel("underflow");
1124        index.setOverflowLabel("overflow");
1125        index.setInflowLabel("inflow");
1126        index.addRecord("!", null);
1127        index.addRecord("\u03B1", null); // GREEK SMALL LETTER ALPHA
1128        index.addRecord("\uab70", null); // CHEROKEE SMALL LETTER A
1129        AlphabeticIndex.Bucket<?> underflowBucket = null;
1130        AlphabeticIndex.Bucket<?> overflowBucket = null;
1131        AlphabeticIndex.Bucket<?> inflowBucket = null;
1132        for (AlphabeticIndex.Bucket<?> bucket : index) {
1133            switch (bucket.getLabelType()) {
1134                case UNDERFLOW:
1135                    assertNull("LabelType not null", underflowBucket);
1136                    underflowBucket = bucket;
1137                    break;
1138                case OVERFLOW:
1139                    assertNull("LabelType not null", overflowBucket);
1140                    overflowBucket = bucket;
1141                    break;
1142                case INFLOW:
1143                    assertNull("LabelType not null", inflowBucket);
1144                    inflowBucket = bucket;
1145                    break;
1146            }
1147        }
1148        assertNotNull("No bucket 'underflow'", underflowBucket);
1149        assertEquals("Wrong bucket label", "underflow", underflowBucket.getLabel());
1150        assertEquals("Wrong bucket label", "underflow", index.getUnderflowLabel());
1151        assertEquals("Bucket size not 1", 1, underflowBucket.size());
1152        assertNotNull("No bucket 'overflow'", overflowBucket);
1153        assertEquals("Wrong bucket label", "overflow", overflowBucket.getLabel());
1154        assertEquals("Wrong bucket label", "overflow", index.getOverflowLabel());
1155        assertEquals("Bucket size not 1", 1, overflowBucket.size());
1156        assertNotNull("No bucket 'inflow'", inflowBucket);
1157        assertEquals("Wrong bucket label", "inflow", inflowBucket.getLabel());
1158        assertEquals("Wrong bucket label", "inflow", index.getInflowLabel());
1159        assertEquals("Bucket size not 1", 1, inflowBucket.size());
1160    }
1161}
1162