1/* GENERATED SOURCE. DO NOT MODIFY. */ 2// © 2016 and later: Unicode, Inc. and others. 3// License & terms of use: http://www.unicode.org/copyright.html#License 4/* 5 ******************************************************************************* 6 * Copyright (C) 2008-2015, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10package android.icu.dev.test.collator; 11import java.util.ArrayList; 12import java.util.Arrays; 13import java.util.Collection; 14import java.util.Iterator; 15import java.util.LinkedHashSet; 16import java.util.List; 17import java.util.Locale; 18import java.util.Set; 19import java.util.TreeSet; 20 21import org.junit.Test; 22 23import android.icu.dev.test.TestFmwk; 24import android.icu.dev.util.CollectionUtilities; 25import android.icu.impl.ICUDebug; 26import android.icu.impl.Row; 27import android.icu.impl.Row.R4; 28import android.icu.lang.UCharacter; 29import android.icu.lang.UProperty; 30import android.icu.lang.UScript; 31import android.icu.text.AlphabeticIndex; 32import android.icu.text.AlphabeticIndex.Bucket; 33import android.icu.text.AlphabeticIndex.Bucket.LabelType; 34import android.icu.text.AlphabeticIndex.ImmutableIndex; 35import android.icu.text.AlphabeticIndex.Record; 36import android.icu.text.Collator; 37import android.icu.text.Normalizer2; 38import android.icu.text.RawCollationKey; 39import android.icu.text.RuleBasedCollator; 40import android.icu.text.UTF16; 41import android.icu.text.UnicodeSet; 42import android.icu.util.ULocale; 43 44/** 45 * @author Mark Davis 46 */ 47public class AlphabeticIndexTest extends TestFmwk { 48 /** 49 * 50 */ 51 private static final String ARROW = "\u2192"; 52 private static final boolean DEBUG = ICUDebug.enabled("alphabeticindex"); 53 54 public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList( 55 "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl", 56 "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da", 57 "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr", 58 "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk", 59 "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta", 60 "te", "mr", "ur", "ml", "kn", "gu", "or")); 61 private String[][] localeAndIndexCharactersLists = new String[][] { 62 /* Arabic*/ {"ar", "\u0627:\u0628:\u062A:\u062B:\u062C:\u062D:\u062E:\u062F:\u0630:\u0631:\u0632:\u0633:\u0634:\u0635:\u0636:\u0637:\u0638:\u0639:\u063A:\u0641:\u0642:\u0643:\u0644:\u0645:\u0646:\u0647:\u0648:\u064A"}, 63 /* Bulgarian*/ {"bg", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"}, 64 /* Catalan*/ {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 65 /* Czech*/ {"cs", "A:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\u0158:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 66 /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 67 /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 68 /* Greek*/ {"el", "\u0391:\u0392:\u0393:\u0394:\u0395:\u0396:\u0397:\u0398:\u0399:\u039A:\u039B:\u039C:\u039D:\u039E:\u039F:\u03A0:\u03A1:\u03A3:\u03A4:\u03A5:\u03A6:\u03A7:\u03A8:\u03A9"}, 69 /* English*/ {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 70 /* Spanish*/ {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 71 /* Estonian*/ {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:Z:\u017D:T:U:V:\u00D5:\u00C4:\u00D6:\u00DC:X:Y"}, 72 /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 73 /* Finnish*/ {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"}, 74 /* Filipino*/ {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:Ng:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 75 /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 76 /* Hebrew*/ {"he", "\u05D0:\u05D1:\u05D2:\u05D3:\u05D4:\u05D5:\u05D6:\u05D7:\u05D8:\u05D9:\u05DB:\u05DC:\u05DE:\u05E0:\u05E1:\u05E2:\u05E4:\u05E6:\u05E7:\u05E8:\u05E9:\u05EA"}, 77 /* Icelandic*/ {"is", "A:\u00C1:B:C:D:\u00D0:E:\u00C9:F:G:H:I:\u00CD:J:K:L:M:N:O:\u00D3:P:Q:R:S:T:U:\u00DA:V:W:X:Y:\u00DD:Z:\u00DE:\u00C6:\u00D6"}, 78 /* Italian*/ {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 79 /* Japanese*/ {"ja", "\u3042:\u304B:\u3055:\u305F:\u306A:\u306F:\u307E:\u3084:\u3089:\u308F"}, 80 /* Korean*/ {"ko", "\u3131:\u3134:\u3137:\u3139:\u3141:\u3142:\u3145:\u3147:\u3148:\u314A:\u314B:\u314C:\u314D:\u314E"}, 81 /* Lithuanian*/ {"lt", "A:B:C:\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\u0160:T:U:V:Z:\u017D"}, 82 /* Latvian*/ {"lv", "A:B:C:\u010C:D:E:F:G:\u0122:H:I:J:K:\u0136:L:\u013B:M:N:\u0145:O:P:Q:R:S:\u0160:T:U:V:W:X:Z:\u017D"}, 83 /* Norwegian Bokm\u00E5l*/ {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 84 /* Dutch*/ {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 85 /* Polish*/ {"pl", "A:\u0104:B:C:\u0106:D:E:\u0118:F:G:H:I:J:K:L:\u0141:M:N:\u0143:O:\u00D3:P:Q:R:S:\u015A:T:U:V:W:X:Y:Z:\u0179:\u017B"}, 86 /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 87 /* Romanian*/ {"ro", "A:\u0102:\u00C2:B:C:D:E:F:G:H:I:\u00CE:J:K:L:M:N:O:P:Q:R:S:\u0218:T:\u021A:U:V:W:X:Y:Z"}, 88 /* Russian*/ {"ru", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042B:\u042D:\u042E:\u042F"}, 89 /* Slovak*/ {"sk", "A:\u00C4:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\u00D4:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 90 /* Slovenian*/ {"sl", "A:B:C:\u010C:\u0106:D:\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 91 /* Serbian*/ {"sr", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0402:\u0415:\u0416:\u0417:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040B:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 92 /* Swedish*/ {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"}, 93 /* Turkish*/ {"tr", "A:B:C:\u00C7:D:E:F:G:H:I:\u0130:J:K:L:M:N:O:\u00D6:P:Q:R:S:\u015E:T:U:\u00DC:V:W:X:Y:Z"}, 94 /* Ukrainian*/ {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"}, 95 /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"}, 96 /* Chinese*/ {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 97 /* Chinese (Traditional Han)*/ {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"}, 98 99 // Comment these out to make the test run faster. Later, make these run under extended 100 101 // /* Afrikaans*/ {"af", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 102 // /* Akan*/ {"ak", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:O:\u0186:P:Q:R:S:T:U:V:W:X:Y:Z"}, 103 // /* Asu*/ {"asa", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 104 // /* Azerbaijani*/ {"az", "A:B:C:\u00C7:D:E:\u018F:F:G:\u011E:H:X:I:\u0130:J:K:Q:L:M:N:O:\u00D6:P:R:S:\u015E:T:U:\u00DC:V:W:Y:Z"}, 105 // /* Belarusian*/ {"be", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0406:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u042B:\u042D:\u042E:\u042F"}, 106 // /* Bemba*/ {"bem", "A:B:C:E:F:G:I:J:K:L:M:N:O:P:S:T:U:W:Y"}, 107 // /* Bena*/ {"bez", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:Y:Z"}, 108 // /* Bambara*/ {"bm", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:\u0186:P:R:S:T:U:W:Y:Z"}, 109 // /* Tibetan*/ {"bo", "\u0F40:\u0F41:\u0F42:\u0F44:\u0F45:\u0F46:\u0F47:\u0F49:\u0F4F:\u0F50:\u0F51:\u0F53:\u0F54:\u0F55:\u0F56:\u0F58:\u0F59:\u0F5A:\u0F5B:\u0F5D:\u0F5E:\u0F5F:\u0F60:\u0F61:\u0F62:\u0F63:\u0F64:\u0F66:\u0F67:\u0F68"}, 110 // /* Chiga*/ {"cgg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 111 // /* Cherokee*/ {"chr", "\u13A0:\u13A6:\u13AD:\u13B3:\u13B9:\u13BE:\u13C6:\u13CC:\u13D3:\u13DC:\u13E3:\u13E9:\u13EF"}, 112 // /* Welsh*/ {"cy", "A:B:C:CH:D:E:F:FF:G:H:I:J:L:LL:M:N:O:P:PH:R:RH:S:T:TH:U:W:Y"}, 113 // /* Taita*/ {"dav", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 114 // /* Embu*/ {"ebu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 115 // /* Ewe*/ {"ee", "A:B:C:D:\u0189:E:\u0190:F:\u0191:G:\u0194:H:I:J:K:L:M:N:\u014A:O:\u0186:P:Q:R:S:T:U:V:\u01B2:W:X:Y:Z"}, 116 // /* Esperanto*/ {"eo", "A:B:C:\u0108:D:E:F:G:\u011C:H:\u0124:I:J:\u0134:K:L:M:N:O:P:R:S:\u015C:T:U:\u016C:V:Z"}, 117 // /* Fulah*/ {"ff", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:L:M:N:\u014A:O:P:R:S:T:U:W:Y:\u01B3"}, 118 // /* Faroese*/ {"fo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8"}, 119 // /* Gusii*/ {"guz", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 120 // /* Hausa*/ {"ha", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:\u0198:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 121 // /* Igbo*/ {"ig", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 122 // /* Machame*/ {"jmc", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 123 // /* Kabyle*/ {"kab", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:P:Q:R:S:T:U:W:X:Y:Z"}, 124 // /* Kamba*/ {"kam", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 125 // /* Makonde*/ {"kde", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 126 // /* Kabuverdianu*/ {"kea", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:X:Z"}, 127 // /* Koyra Chiini*/ {"khq", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"}, 128 // /* Kikuyu*/ {"ki", "A:B:C:D:E:G:H:I:J:K:M:N:O:R:T:U:W:Y"}, 129 // /* Kalenjin*/ {"kln", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:W:Y"}, 130 // /* Langi*/ {"lag", "A:B:C:D:E:F:G:H:I:\u0197:J:K:L:M:N:O:P:Q:R:S:T:U:\u0244:V:W:X:Y:Z"}, 131 // /* Ganda*/ {"lg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 132 // /* Luo*/ {"luo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"}, 133 // /* Luyia*/ {"luy", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 134 // /* Masai*/ {"mas", "A:B:C:D:E:\u0190:G:H:I:\u0197:J:K:L:M:N:\u014A:O:\u0186:P:R:S:T:U:\u0244:W:Y"}, 135 // /* Meru*/ {"mer", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 136 // /* Morisyen*/ {"mfe", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y:Z"}, 137 // /* Malagasy*/ {"mg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:V:Y:Z"}, 138 // This should be the correct data. Commented till it is fixed in CLDR collation data. 139 // {"mk", "\u0410:\u0411:\u0412:\u0413:\u0403:\u0414:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u040C:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 140 // /* Macedonian*/ {"mk", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0403:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040C:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 141 // This should be the correct data. Commented till it is fixed in CLDR collation data. 142 // {"mt", "A:B:C:\u010A:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"}, 143 // /* Maltese*/ {"mt", "A:B:\u010A:C:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"}, 144 // /* Nama*/ {"naq", "A:B:C:D:E:F:G:H:I:K:M:N:O:P:Q:R:S:T:U:W:X:Y:Z"}, 145 // /* North Ndebele*/ {"nd", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:S:T:U:V:W:X:Y:Z"}, 146 // /* Norwegian Nynorsk*/ {"nn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 147 // /* Nyankole*/ {"nyn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 148 // /* Oromo*/ {"om", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 149 // /* Romansh*/ {"rm", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 150 // /* Rombo*/ {"rof", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 151 // /* Kinyarwanda*/ {"rw", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 152 // /* Rwa*/ {"rwk", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 153 // /* Samburu*/ {"saq", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"}, 154 // /* Sena*/ {"seh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 155 // /* Koyraboro Senni*/ {"ses", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"}, 156 // /* Sango*/ {"sg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 157 // /* Tachelhit*/ {"shi", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"}, 158 // /* Tachelhit (Tifinagh)*/ {"shi_Tfng", "\u2D30:\u2D31:\u2D33:\u2D37:\u2D39:\u2D3B:\u2D3C:\u2D3D:\u2D40:\u2D43:\u2D44:\u2D45:\u2D47:\u2D49:\u2D4A:\u2D4D:\u2D4E:\u2D4F:\u2D53:\u2D54:\u2D55:\u2D56:\u2D59:\u2D5A:\u2D5B:\u2D5C:\u2D5F:\u2D61:\u2D62:\u2D63:\u2D65"}, 159 // /* Shona*/ {"sn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 160 // /* Teso*/ {"teo", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y"}, 161 // /* Tonga*/ {"to", "A:B:C:D:E:F:G:H:\u02BB:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 162 // /* Central Morocco Tamazight*/ {"tzm", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"}, 163 // /* Uzbek (Latin)*/ {"uz_Latn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u02BF"}, 164 // /* Vunjo*/ {"vun", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 165 // /* Soga*/ {"xog", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 166 // /* Yoruba*/ {"yo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 167 168 }; 169 170// public void TestAAKeyword() { 171// ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance( 172// ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh"); 173// showBundle(rb, 0); 174// String[] keywords = Collator.getKeywords(); 175// System.out.println(Arrays.asList(keywords)); 176// String locale = "zh"; 177// ULocale ulocale = new ULocale(locale); 178// for (String keyword : keywords) { 179// List<String> values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false)); 180// List<String> allValues = Arrays.asList(Collator.getKeywordValues(keyword)); 181// for (String value : allValues) { 182// System.out.println(keyword + "=" + value); 183// checkKeyword(locale, value, values.contains(value)); 184// } 185// } 186// } 187// 188// private void checkKeyword(String locale, String collationValue, boolean shouldExist) { 189// final ULocale base = new ULocale(locale); 190// final ULocale desired = new ULocale(locale + "@collation=" + collationValue); 191// Collator foo = Collator.getInstance(desired); 192// ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE); 193// if (shouldExist) { 194// assertEquals("actual should match desired", desired, actual); 195// } else { 196// assertEquals("actual should match base", base, actual); 197// } 198// int comp = foo.compare("a", "ā"); 199// assertEquals("should fall back to default for zh", -1, comp); 200// } 201// 202// /** 203// * @param rb 204// * @param i 205// */ 206// private static void showBundle(UResourceBundle rb, int i) { 207// for (String key : rb.keySet()) { 208// System.out.print("\n" + Utility.repeat(" ", i) + key); 209// UResourceBundle rb2 = rb.get(key); 210// showBundle(rb2, i+1); 211// } 212// } 213 214 215 @Test 216 public void TestA() { 217 String[][] tests = {{"zh_Hant", "渡辺", "12劃"}, 218 {"zh", "渡辺", "D"} 219 /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/ 220 }; 221 for (String[] test : tests) { 222 AlphabeticIndex<Integer> alphabeticIndex = new AlphabeticIndex<Integer>(new ULocale(test[0])); 223 final String probe = test[1]; 224 final String expectedLabel = test[2]; 225 alphabeticIndex.addRecord(probe, 1); 226 List labels = alphabeticIndex.getBucketLabels(); 227 logln(labels.toString()); 228 Bucket<Integer> bucket = find(alphabeticIndex, probe); 229 assertEquals("locale " + test[0] + " name=" + probe + " in bucket", 230 expectedLabel, bucket.getLabel()); 231 } 232 } 233 234 private Bucket<Integer> find(AlphabeticIndex<Integer> alphabeticIndex, final String probe) { 235 for (Bucket<Integer> bucket : alphabeticIndex) { 236 for (Record<Integer> record : bucket) { 237 if (record.getName().equals(probe)) { 238 return bucket; 239 } 240 } 241 } 242 return null; 243 } 244 245 @Test 246 public void TestFirstCharacters() { 247 248 AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH); 249 RuleBasedCollator collator = alphabeticIndex.getCollator(); 250 collator.setStrength(Collator.IDENTICAL); 251 Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts(); 252 // Verify that each script is represented exactly once. 253 // Exclude pseudo-scripts like Common (no letters). 254 // Exclude scripts like Braille and Sutton SignWriting 255 // because they only have symbols, not letters. 256 UnicodeSet missingScripts = new UnicodeSet( 257 "[^[:inherited:][:unknown:][:common:][:Braille:][:SignWriting:]]"); 258 String last = ""; 259 for (String index : firsts) { 260 if (collator.compare(last,index) >= 0) { 261 errln("Characters not in order: " + last + " !< " + index); 262 } 263 int script = getFirstRealScript(index); 264 if (script == UScript.UNKNOWN) { continue; } 265 UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script); 266 if (missingScripts.containsNone(s)) { 267 errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false)); 268 } 269 missingScripts.removeAll(s); 270 } 271 if (missingScripts.size() != 0) { 272 String missingScriptNames = ""; 273 UnicodeSet missingChars = new UnicodeSet(missingScripts); 274 for(;;) { 275 int c = missingChars.charAt(0); 276 if (c < 0) { 277 break; 278 } 279 int script = UScript.getScript(c); 280 missingScriptNames += " " + 281 UCharacter.getPropertyValueName( 282 UProperty.SCRIPT, script, UProperty.NameChoice.SHORT); 283 missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script)); 284 } 285 errln("Missing character from:" + missingScriptNames + " -- " + missingScripts); 286 } 287 } 288 289 private static final int getFirstRealScript(CharSequence s) { 290 for (int i = 0; i < s.length();) { 291 int c = Character.codePointAt(s, i); 292 int script = UScript.getScript(c); 293 if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) { 294 return script; 295 } 296 i += Character.charCount(c); 297 } 298 return UScript.UNKNOWN; 299 } 300 301 @Test 302 public void TestBuckets() { 303 ULocale additionalLocale = ULocale.ENGLISH; 304 305 for (String[] pair : localeAndIndexCharactersLists) { 306 checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron"); 307 } 308 } 309 310 @Test 311 public void TestEmpty() { 312 // just verify that it doesn't blow up. 313 Set<ULocale> locales = new LinkedHashSet<ULocale>(); 314 locales.add(ULocale.ROOT); 315 locales.addAll(Arrays.asList(ULocale.getAvailableLocales())); 316 for (ULocale locale : locales) { 317 try { 318 AlphabeticIndex<String> alphabeticIndex = new AlphabeticIndex(locale); 319 alphabeticIndex.addRecord("hi", "HI"); 320 for (Bucket<String> bucket : alphabeticIndex) { 321 @SuppressWarnings("unused") 322 LabelType labelType = bucket.getLabelType(); 323 } 324 } catch (Exception e) { 325 errln("Exception when creating AlphabeticIndex for:\t" + locale.toLanguageTag()); 326 errln(e.toString()); 327 } 328 } 329 } 330 331 @Test 332 public void TestSetGetSpecialLabels() { 333 AlphabeticIndex index = new AlphabeticIndex(Locale.GERMAN).addLabels(new Locale("ru")); 334 index.setUnderflowLabel("__"); 335 index.setInflowLabel("--"); 336 index.setOverflowLabel("^^"); 337 assertEquals("underflow label", "__", index.getUnderflowLabel()); 338 assertEquals("inflow label", "--", index.getInflowLabel()); 339 assertEquals("overflow label", "^^", index.getOverflowLabel()); 340 341 ImmutableIndex ii = index.buildImmutableIndex(); 342 assertEquals("0 -> underflow", "__", ii.getBucket(ii.getBucketIndex("0")).getLabel()); 343 assertEquals("Ω -> inflow", "--", ii.getBucket(ii.getBucketIndex("Ω")).getLabel()); 344 assertEquals("字 -> overflow", "^^", ii.getBucket(ii.getBucketIndex("字")).getLabel()); 345 } 346 347 @Test 348 public void TestInflow() { 349 Object[][] tests = { 350 {0, ULocale.ENGLISH}, 351 {0, ULocale.ENGLISH, new ULocale("el")}, 352 {1, ULocale.ENGLISH, new ULocale("ru")}, 353 {0, ULocale.ENGLISH, new ULocale("el"), new UnicodeSet("[\u2C80]"), new ULocale("ru")}, 354 {0, ULocale.ENGLISH}, 355 {2, ULocale.ENGLISH, new ULocale("ru"), ULocale.JAPANESE}, 356 }; 357 for (Object[] test : tests) { 358 int expected = (Integer) test[0]; 359 AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex((ULocale)test[1]); 360 for (int i = 2; i < test.length; ++i) { 361 if (test[i] instanceof ULocale) { 362 alphabeticIndex.addLabels((ULocale)test[i]); 363 } else { 364 alphabeticIndex.addLabels((UnicodeSet)test[i]); 365 } 366 } 367 Counter<AlphabeticIndex.Bucket.LabelType> counter = new Counter(); 368 for (Bucket<Double> bucket : alphabeticIndex) { 369 LabelType labelType = bucket.getLabelType(); 370 counter.add(labelType, 1); 371 } 372 String printList = Arrays.asList(test).toString(); 373 assertEquals(LabelType.UNDERFLOW + "\t" + printList, 1, counter.get(LabelType.UNDERFLOW)); 374 assertEquals(LabelType.INFLOW + "\t" + printList, expected, counter.get(LabelType.INFLOW)); 375 if (expected != counter.get(LabelType.INFLOW)) { 376 // for debugging 377 AlphabeticIndex<Double> indexCharacters2 = new AlphabeticIndex((ULocale)test[1]); 378 for (int i = 2; i < test.length; ++i) { 379 if (test[i] instanceof ULocale) { 380 indexCharacters2.addLabels((ULocale)test[i]); 381 } else { 382 indexCharacters2.addLabels((UnicodeSet)test[i]); 383 } 384 } 385 List<Bucket<Double>> buckets = CollectionUtilities.addAll(alphabeticIndex.iterator(), new ArrayList<Bucket<Double>>()); 386 logln(buckets.toString()); 387 } 388 assertEquals(LabelType.OVERFLOW + "\t" + printList, 1, counter.get(LabelType.OVERFLOW)); 389 } 390 } 391 392 private void checkBuckets(String localeString, String[] test, ULocale additionalLocale, String testBucket, String... items) { 393 StringBuilder UI = new StringBuilder(); 394 ULocale desiredLocale = new ULocale(localeString); 395 396 // Create a simple index where the values for the strings are Integers, and add the strings 397 AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale); 398 int counter = 0; 399 Counter<String> itemCount = new Counter(); 400 for (String item : test) { 401 index.addRecord(item, counter++); 402 itemCount.add(item, 1); 403 } 404 assertEquals("getRecordCount()", (int)itemCount.getTotal(), index.getRecordCount()); // code coverage 405 406 List<String> labels = index.getBucketLabels(); 407 ImmutableIndex<Integer> immIndex = index.buildImmutableIndex(); 408 409 logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t" 410 + index.getCollator().getLocale(ULocale.ACTUAL_LOCALE)); 411 UI.setLength(0); 412 UI.append(desiredLocale + "\t"); 413 boolean showAll = true; 414 415 // Show index at top. We could skip or gray out empty buckets 416 for (AlphabeticIndex.Bucket<Integer> bucket : index) { 417 if (showAll || bucket.size() != 0) { 418 showLabelAtTop(UI, bucket.getLabel()); 419 } 420 } 421 logln(UI.toString()); 422 423 // Show the buckets with their contents, skipping empty buckets 424 int bucketIndex = 0; 425 for (Bucket<Integer> bucket : index) { 426 assertEquals("bucket label vs. iterator", 427 labels.get(bucketIndex), bucket.getLabel()); 428 assertEquals("bucket label vs. immutable", 429 labels.get(bucketIndex), immIndex.getBucket(bucketIndex).getLabel()); 430 assertEquals("bucket label type vs. immutable", 431 bucket.getLabelType(), immIndex.getBucket(bucketIndex).getLabelType()); 432 for (Record<Integer> r : bucket) { 433 CharSequence name = r.getName(); 434 assertEquals("getBucketIndex(" + name + ")", 435 bucketIndex, index.getBucketIndex(name)); 436 assertEquals("immutable getBucketIndex(" + name + ")", 437 bucketIndex, immIndex.getBucketIndex(name)); 438 } 439 if (bucket.getLabel().equals(testBucket)) { 440 Counter<String> keys = getKeys(bucket); 441 for (String item : items) { 442 long globalCount = itemCount.get(item); 443 long localeCount = keys.get(item); 444 if (globalCount != localeCount) { 445 errln("Error: in " + "'" + testBucket + "', '" + item + "' should have count " 446 + globalCount + " but has count " + localeCount); 447 } 448 449 } 450 } 451 452 if (bucket.size() != 0) { 453 showLabelInList(UI, bucket.getLabel()); 454 for (AlphabeticIndex.Record<Integer> item : bucket) { 455 showIndexedItem(UI, item.getName(), item.getData()); 456 } 457 logln(UI.toString()); 458 } 459 ++bucketIndex; 460 } 461 assertEquals("getBucketCount()", bucketIndex, index.getBucketCount()); 462 assertEquals("immutable getBucketCount()", bucketIndex, immIndex.getBucketCount()); 463 464 assertNull("immutable getBucket(-1)", immIndex.getBucket(-1)); 465 assertNull("immutable getBucket(count)", immIndex.getBucket(bucketIndex)); 466 467 for (Bucket<Integer> bucket : immIndex) { 468 assertEquals("immutable bucket size", 0, bucket.size()); 469 assertFalse("immutable bucket iterator.hasNext()", bucket.iterator().hasNext()); 470 } 471 } 472 473 public <T> void showIndex(AlphabeticIndex<T> index, boolean showEmpty) { 474 logln("Actual"); 475 StringBuilder UI = new StringBuilder(); 476 for (Bucket<T> bucket : index) { 477 if (showEmpty || bucket.size() != 0) { 478 showLabelInList(UI, bucket.getLabel()); 479 for (Record<T> item : bucket) { 480 showIndexedItem(UI, item.getName(), item.getData()); 481 } 482 logln(UI.toString()); 483 } 484 } 485 } 486 487 /** 488 * @param myBucketLabels 489 * @param myBucketContents 490 * @param b 491 */ 492 private void showIndex(List<String> myBucketLabels, ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents, boolean showEmpty) { 493 logln("Alternative"); 494 StringBuilder UI = new StringBuilder(); 495 496 for (int i = 0; i < myBucketLabels.size(); ++i) { 497 Set<R4<RawCollationKey, String, Integer, Double>> bucket = myBucketContents.get(i); 498 if (!showEmpty && bucket.size() == 0) { 499 continue; 500 } 501 UI.setLength(0); 502 UI.append("*").append(myBucketLabels.get(i)); 503 for (R4<RawCollationKey, String, Integer, Double> item : bucket) { 504 UI.append("\t ").append(item.get1().toString()).append(ARROW).append(item.get3().toString()); 505 } 506 logln(UI.toString()); 507 } 508 } 509 510 private void showLabelAtTop(StringBuilder buffer, String label) { 511 buffer.append(label + " "); 512 } 513 514 private <T> void showIndexedItem(StringBuilder buffer, CharSequence key, T value) { 515 buffer.append("\t " + key + ARROW + value); 516 } 517 518 private void showLabelInList(StringBuilder buffer, String label) { 519 buffer.setLength(0); 520 buffer.append(label); 521 } 522 523 private Counter<String> getKeys(AlphabeticIndex.Bucket<Integer> entry) { 524 Counter<String> keys = new Counter<String>(); 525 for (AlphabeticIndex.Record x : entry) { 526 String key = x.getName().toString(); 527 keys.add(key, 1); 528 } 529 return keys; 530 } 531 532 @Test 533 public void TestIndexCharactersList() { 534 for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) { 535 ULocale locale = new ULocale(localeAndIndexCharacters[0]); 536 String expectedIndexCharacters = "\u2026:" + localeAndIndexCharacters[1] + ":\u2026"; 537 Collection<String> alphabeticIndex = new AlphabeticIndex(locale).getBucketLabels(); 538 539 // Join the elements of the list to a string with delimiter ":" 540 StringBuilder sb = new StringBuilder(); 541 Iterator<String> iter = alphabeticIndex.iterator(); 542 while (iter.hasNext()) { 543 sb.append(iter.next()); 544 if (!iter.hasNext()) { 545 break; 546 } 547 sb.append(":"); 548 } 549 String actualIndexCharacters = sb.toString(); 550 if (!expectedIndexCharacters.equals(actualIndexCharacters)) { 551 errln("Test failed for locale " + localeAndIndexCharacters[0] + 552 "\n Expected = |" + expectedIndexCharacters + "|\n actual = |" + actualIndexCharacters + "|"); 553 } 554 } 555 } 556 557 @Test 558 public void TestBasics() { 559 ULocale[] list = ULocale.getAvailableLocales(); 560 // get keywords combinations 561 // don't bother with multiple combinations at this point 562 List keywords = new ArrayList(); 563 keywords.add(""); 564 565 String[] collationValues = Collator.getKeywordValues("collation"); 566 for (int j = 0; j < collationValues.length; ++j) { 567 keywords.add("@collation=" + collationValues[j]); 568 } 569 570 for (int i = 0; i < list.length; ++i) { 571 for (Iterator it = keywords.iterator(); it.hasNext();) { 572 String collationValue = (String) it.next(); 573 String localeString = list[i].toString(); 574 if (!KEY_LOCALES.contains(localeString)) continue; // TODO change in exhaustive 575 ULocale locale = new ULocale(localeString + collationValue); 576 if (collationValue.length() > 0 && !Collator.getFunctionalEquivalent("collation", locale).equals(locale)) { 577 //logln("Skipping " + locale); 578 continue; 579 } 580 581 if (locale.getCountry().length() != 0) { 582 continue; 583 } 584 boolean isUnihan = collationValue.contains("unihan"); 585 AlphabeticIndex alphabeticIndex = new AlphabeticIndex(locale); 586 if (isUnihan) { 587 // Unihan tailorings have a label per radical, and there are at least 214, 588 // if not more when simplified radicals are distinguished. 589 alphabeticIndex.setMaxLabelCount(500); 590 } 591 final Collection mainChars = alphabeticIndex.getBucketLabels(); 592 String mainCharString = mainChars.toString(); 593 if (mainCharString.length() > 500) { 594 mainCharString = mainCharString.substring(0,500) + "..."; 595 } 596 logln(mainChars.size() + "\t" + locale + "\t" + locale.getDisplayName(ULocale.ENGLISH)); 597 logln("Index:\t" + mainCharString); 598 if (!isUnihan && mainChars.size() > 100) { 599 errln("Index character set too large: " + 600 locale + " [" + mainChars.size() + "]:\n " + mainChars); 601 } 602 } 603 } 604 } 605 606 @Test 607 public void TestClientSupport() { 608 for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"} 609 ULocale ulocale = new ULocale(localeString); 610 AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex<Double>(ulocale).addLabels(Locale.ENGLISH); 611 RuleBasedCollator collator = alphabeticIndex.getCollator(); 612 String [][] tests; 613 614 if (!localeString.equals("zh") ) { 615 tests = new String[][] {SimpleTests}; 616 } else { 617 tests = new String[][] {SimpleTests, hackPinyin, simplifiedNames}; 618 } 619 620 for (String [] shortTest : tests) { 621 double testValue = 100; 622 alphabeticIndex.clearRecords(); 623 for (String name : shortTest) { 624 alphabeticIndex.addRecord(name, testValue++); 625 } 626 627 if (DEBUG) showIndex(alphabeticIndex, false); 628 629 // make my own copy 630 testValue = 100; 631 List<String> myBucketLabels = alphabeticIndex.getBucketLabels(); 632 ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents = new ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>>(myBucketLabels.size()); 633 for (int i = 0; i < myBucketLabels.size(); ++i) { 634 myBucketContents.add(new TreeSet<R4<RawCollationKey, String, Integer, Double>>()); 635 } 636 for (String name : shortTest) { 637 int bucketIndex = alphabeticIndex.getBucketIndex(name); 638 if (bucketIndex > myBucketContents.size()) { 639 alphabeticIndex.getBucketIndex(name); // call again for debugging 640 } 641 Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(bucketIndex); 642 RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null); 643 R4<RawCollationKey, String, Integer, Double> row = Row.of(rawCollationKey, name, name.length(), testValue++); 644 myBucket.add(row); 645 } 646 if (DEBUG) showIndex(myBucketLabels, myBucketContents, false); 647 648 // now compare 649 int index = 0; 650 boolean gotError = false; 651 for (AlphabeticIndex.Bucket<Double> bucket : alphabeticIndex) { 652 String bucketLabel = bucket.getLabel(); 653 String myLabel = myBucketLabels.get(index); 654 if (!bucketLabel.equals(myLabel)) { 655 gotError |= !assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel); 656 } 657 Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(index); 658 Iterator<R4<RawCollationKey, String, Integer, Double>> myBucketIterator = myBucket.iterator(); 659 int recordIndex = 0; 660 for (Record<Double> record : bucket) { 661 String myName = null; 662 if (myBucketIterator.hasNext()) { 663 R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next(); 664 myName = myRecord.get1(); 665 } 666 if (!record.getName().equals(myName)) { 667 gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", record.getName(), myName); 668 } 669 } 670 while (myBucketIterator.hasNext()) { 671 R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next(); 672 String myName = myRecord.get1(); 673 gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", null, myName); 674 } 675 index++; 676 } 677 if (gotError) { 678 showIndex(myBucketLabels, myBucketContents, false); 679 showIndex(alphabeticIndex, false); 680 } 681 } 682 } 683 } 684 685 @Test 686 public void TestFirstScriptCharacters() { 687 Collection<String> firstCharacters = 688 new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts(); 689 Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT)); 690 Collection<String> diff = new TreeSet<String>(firstCharacters); 691 diff.removeAll(expectedFirstCharacters); 692 assertTrue("First Characters contains unexpected ones: " + diff, diff.isEmpty()); 693 diff.clear(); 694 diff.addAll(expectedFirstCharacters); 695 diff.removeAll(firstCharacters); 696 assertTrue("First Characters missing expected ones: " + diff, diff.isEmpty()); 697 } 698 699 private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze(); 700 701 /** 702 * Returns a collection of all the "First" characters of scripts, according to the collation. 703 */ 704 private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) { 705 String[] results = new String[UScript.CODE_LIMIT]; 706 for (String current : TO_TRY) { 707 if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols. 708 continue; 709 } 710 int script = UScript.getScript(current.codePointAt(0)); 711 if (results[script] == null) { 712 results[script] = current; 713 } else if (ruleBasedCollator.compare(current, results[script]) < 0) { 714 results[script] = current; 715 } 716 } 717 718 try { 719 UnicodeSet extras = new UnicodeSet(); 720 UnicodeSet expansions = new UnicodeSet(); 721 ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true); 722 extras.addAll(expansions).removeAll(TO_TRY); 723 if (extras.size() != 0) { 724 Normalizer2 normalizer = Normalizer2.getNFKCInstance(); 725 for (String current : extras) { 726 if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) { 727 continue; 728 } 729 int script = getFirstRealScript(current); 730 if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; } 731 if (results[script] == null) { 732 results[script] = current; 733 } else if (ruleBasedCollator.compare(current, results[script]) < 0) { 734 results[script] = current; 735 } 736 } 737 } 738 } catch (Exception e) { 739 } // why have a checked exception??? 740 741 // TODO: We should not test that we get the same strings, but that we 742 // get strings that sort primary-equal to those from the implementation. 743 744 Collection<String> result = new ArrayList<String>(); 745 for (int i = 0; i < results.length; ++i) { 746 if (results[i] != null) { 747 result.add(results[i]); 748 } 749 } 750 return result; 751 } 752 753 private static final boolean isUnassignedBoundary(CharSequence s) { 754 // The root collator provides a script-first-primary boundary contraction 755 // for the unassigned-implicit range. 756 return s.charAt(0) == 0xfdd1 && 757 UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN; 758 } 759 760 @Test 761 public void TestZZZ() { 762 // int x = 3; 763 // AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH); 764 // UnicodeSet additions = new UnicodeSet(); 765 // additions.add(0x410).add(0x415); // Cyrillic 766 // // additions.add(0x391).add(0x393); // Greek 767 // index.addLabels(additions); 768 // int lc = index.getLabels().size(); 769 // List labels = index.getLabels(); 770 // System.out.println("Label Count = " + lc + "\t" + labels); 771 // System.out.println("Bucket Count =" + index.getBucketCount()); 772 } 773 774 @Test 775 public void TestSimplified() { 776 checkBuckets("zh", simplifiedNames, ULocale.ENGLISH, "W", "\u897f"); 777 } 778 779 @Test 780 public void TestTraditional() { 781 checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580"); 782 } 783 784 static final String[] SimpleTests = { 785 "斎藤", 786 "\u1f2d\u03c1\u03b1", 787 "$", "\u00a3", "12", "2", 788 "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul", 789 "\u00deor", "\u00c5berg", "\u00d6stlund", 790 "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6", 791 "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac", 792 //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2", 793 "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4", 794 //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34" 795 }; 796 797 static final String[] hackPinyin = { 798 "a", "\u5416", "\u58ba", // 799 "b", "\u516b", "\u62d4", "\u8500", // 800 "c", "\u5693", "\u7938", "\u9e7e", // 801 "d", "\u5491", "\u8fcf", "\u964a", // 802 "e","\u59b8", "\u92e8", "\u834b", // 803 "f", "\u53d1", "\u9197", "\u99a5", // 804 "g", "\u7324", "\u91d3", "\u8142", // 805 "h", "\u598e", "\u927f", "\u593b", // 806 "j", "\u4e0c", "\u6785", "\u9d58", // 807 "k", "\u5494", "\u958b", "\u7a52", // 808 "l", "\u5783", "\u62c9", "\u9ba5", // 809 "m", "\u5638", "\u9ebb", "\u65c0", // 810 "n", "\u62ff", "\u80ad", "\u685b", // 811 "o", "\u5662", "\u6bee", "\u8bb4", // 812 "p", "\u5991", "\u8019", "\u8c31", // 813 "q", "\u4e03", "\u6053", "\u7f56", // 814 "r", "\u5465", "\u72aa", "\u6e03", // 815 "s", "\u4ee8", "\u9491", "\u93c1", // 816 "t", "\u4ed6", "\u9248", "\u67dd", // 817 "w", "\u5c72", "\u5558", "\u5a7a", // 818 "x", "\u5915", "\u5438", "\u6bbe", // 819 "y", "\u4e2b", "\u82bd", "\u8574", // 820 "z", "\u5e00", "\u707d", "\u5c0a" 821 }; 822 823 static final String[] simplifiedNames = { 824 "Abbot", "Morton", "Zachary", "Williams", "\u8d75", "\u94b1", "\u5b59", "\u674e", "\u5468", "\u5434", "\u90d1", "\u738b", "\u51af", "\u9648", "\u696e", "\u536b", "\u848b", "\u6c88", 825 "\u97e9", "\u6768", "\u6731", "\u79e6", "\u5c24", "\u8bb8", "\u4f55", "\u5415", "\u65bd", "\u5f20", "\u5b54", "\u66f9", "\u4e25", "\u534e", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8c22", "\u90b9", 826 "\u55bb", "\u67cf", "\u6c34", "\u7aa6", "\u7ae0", "\u4e91", "\u82cf", "\u6f58", "\u845b", "\u595a", "\u8303", "\u5f6d", "\u90ce", "\u9c81", "\u97e6", "\u660c", "\u9a6c", "\u82d7", "\u51e4", "\u82b1", "\u65b9", 827 "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9c8d", "\u53f2", "\u5510", "\u8d39", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8d3a", "\u502a", "\u6c64", "\u6ed5", "\u6bb7", "\u7f57", "\u6bd5", "\u90dd", 828 "\u90ac", "\u5b89", "\u5e38", "\u4e50", "\u4e8e", "\u65f6", "\u5085", "\u76ae", "\u535e", "\u9f50", "\u5eb7", "\u4f0d", "\u4f59", "\u5143", "\u535c", "\u987e", "\u5b5f", "\u5e73", "\u9ec4", "\u548c", "\u7a46", 829 "\u8427", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8d1d", "\u660e", "\u81e7", "\u8ba1", "\u4f0f", "\u6210", "\u6234", "\u8c08", "\u5b8b", "\u8305", 830 "\u5e9e", "\u718a", "\u7eaa", "\u8212", "\u5c48", "\u9879", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u84dd", "\u95fd", "\u5e2d", "\u5b63", "\u9ebb", "\u5f3a", "\u8d3e", "\u8def", "\u5a04", "\u5371", 831 "\u6c5f", "\u7ae5", "\u989c", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u953a", "\u5f90", "\u4e18", "\u9a86", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", 832 "\u4e07", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u5362", "\u83ab", "\u7ecf", "\u623f", "\u88d8", "\u7f2a", "\u5e72", "\u89e3", "\u5e94", "\u5b97", "\u4e01", "\u5ba3", "\u8d32", "\u9093", "\u90c1", "\u5355", 833 "\u676d", "\u6d2a", "\u5305", "\u8bf8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u94ae", "\u9f9a", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9646", "\u8363", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", 834 "\u60e0", "\u7504", "\u9eb9", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u50a8", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u4e4c", "\u7126", "\u5df4", "\u5f13", 835 "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8f66", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bab", "\u5b81", "\u4ec7", "\u683e", "\u66b4", "\u7518", 836 "\u659c", "\u5389", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5218", "\u666f", "\u8a79", "\u675f", "\u9f99", "\u53f6", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u84df", "\u8584", "\u5370", "\u5bbf", 837 "\u767d", "\u6000", "\u84b2", "\u90b0", "\u4ece", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8d56", "\u5353", "\u853a", "\u5c60", "\u8499", "\u6c60", "\u4e54", "\u9634", "\u90c1", "\u80e5", "\u80fd", "\u82cd", 838 "\u53cc", "\u95fb", "\u8398", "\u515a", "\u7fdf", "\u8c2d", "\u8d21", "\u52b3", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u90e6", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", 839 "\u6fee", "\u725b", "\u5bff", "\u901a", "\u8fb9", "\u6248", "\u71d5", "\u5180", "\u90cf", "\u6d66", "\u5c1a", "\u519c", "\u6e29", "\u522b", "\u5e84", "\u664f", "\u67f4", "\u77bf", "\u960e", "\u5145", "\u6155", 840 "\u8fde", "\u8339", "\u4e60", "\u5ba6", "\u827e", "\u9c7c", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7ec8", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", 841 "\u6ee1", "\u5f18", "\u5321", "\u56fd", "\u6587", "\u5bc7", "\u5e7f", "\u7984", "\u9619", "\u4e1c", "\u6b27", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e08", "\u5de9", "\u538d", 842 "\u8042", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u961a", "\u90a3", "\u7b80", "\u9976", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u517b", "\u97a0", "\u987b", "\u4e30", 843 "\u5de2", "\u5173", "\u84af", "\u76f8", "\u67e5", "\u540e", "\u8346", "\u7ea2", "\u6e38", "\u7afa", "\u6743", "\u9011", "\u76d6", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u9a6c", "\u4e0a\u5b98", "\u6b27\u9633", 844 "\u590f\u4faf", "\u8bf8\u845b", "\u95fb\u4eba", "\u4e1c\u65b9", "\u8d6b\u8fde", "\u7687\u752b", "\u5c09\u8fdf", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u9633", "\u6df3\u4e8e", "\u5355\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b59", "\u4ef2\u5b59", 845 "\u8f69\u8f95", "\u4ee4\u72d0", "\u953a\u79bb", "\u5b87\u6587", "\u957f\u5b59", "\u6155\u5bb9", "\u9c9c\u4e8e", "\u95fe\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8f66", "\u989b\u5b59", "\u7aef\u6728", "\u5deb\u9a6c", 846 "\u516c\u897f", "\u6f06\u96d5", "\u4e50\u6b63", "\u58e4\u9a77", "\u516c\u826f", "\u62d3\u62d4", "\u5939\u8c37", "\u5bb0\u7236", "\u8c37\u6881", "\u664b", "\u695a", "\u960e", "\u6cd5", "\u6c5d", "\u9122", "\u6d82", "\u94a6", "\u6bb5\u5e72", "\u767e\u91cc", 847 "\u4e1c\u90ed", "\u5357\u95e8", "\u547c\u5ef6", "\u5f52", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e05", "\u7f11", "\u4ea2", "\u51b5", "\u540e", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u4e1c\u95e8", "\u897f\u95e8", 848 "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f" 849 }; 850 851 static final String[] traditionalNames = { "丁", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b", 852 "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523", 853 "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd", 854 "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", 855 "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58", 856 "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7", 857 "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2", 858 "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5", 859 "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642", 860 "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867", 861 "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b", 862 "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08", 863 "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212", 864 "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d", 865 "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f", 866 "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8", 867 "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f", 868 "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79", 869 "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d", 870 "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b", 871 "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", 872 "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72", 873 "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4", 874 "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168", 875 "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12", 876 "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f", 877 "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a", 878 "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22", 879 "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670", 880 "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a", 881 "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148", 882 "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a", 883 "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a", 884 "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6", 885 "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", 886 "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321", 887 "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83", 888 "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641", 889 "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952", 890 "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2", 891 "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a", 892 "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98", 893 "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023", 894 "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", 895 "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b", 896 "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b", 897 "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", 898 "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac", 899 "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4", 900 "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122", 901 "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6", 902 "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1", 903 "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546", 904 "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a", 905 "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7", 906 "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26", 907 "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4", 908 "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35", 909 "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E", 910 "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A", 911 "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1", 912 "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C", 913 "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C", 914 "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3", 915 "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", }; 916 917 /** 918 * Test AlphabeticIndex vs. root with script reordering. 919 */ 920 @Test 921 public void TestHaniFirst() { 922 RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 923 coll.setReorderCodes(UScript.HAN); 924 AlphabeticIndex index = new AlphabeticIndex(coll); 925 assertEquals("getBucketCount()", 1, index.getBucketCount()); // ... (underflow only) 926 index.addLabels(Locale.ENGLISH); 927 assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 928 int bucketIndex = index.getBucketIndex("\u897f"); 929 assertEquals("getBucketIndex(U+897F)", 0, bucketIndex); // underflow bucket 930 bucketIndex = index.getBucketIndex("i"); 931 assertEquals("getBucketIndex(i)", 9, bucketIndex); 932 bucketIndex = index.getBucketIndex("\u03B1"); 933 assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex); 934 // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. 935 bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005)); 936 assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); 937 bucketIndex = index.getBucketIndex("\uFFFF"); 938 assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); 939 } 940 941 /** 942 * Test AlphabeticIndex vs. Pinyin with script reordering. 943 */ 944 @Test 945 public void TestPinyinFirst() { 946 RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE); 947 coll.setReorderCodes(UScript.HAN); 948 AlphabeticIndex index = new AlphabeticIndex(coll); 949 assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 950 index.addLabels(Locale.CHINESE); 951 assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 952 int bucketIndex = index.getBucketIndex("\u897f"); 953 assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex); 954 bucketIndex = index.getBucketIndex("i"); 955 assertEquals("getBucketIndex(i)", 9, bucketIndex); 956 bucketIndex = index.getBucketIndex("\u03B1"); 957 assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex); 958 // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. 959 bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005)); 960 assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); 961 bucketIndex = index.getBucketIndex("\uFFFF"); 962 assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); 963 } 964 965 /** 966 * Test labels with multiple primary weights. 967 */ 968 @Test 969 public void TestSchSt() { 970 AlphabeticIndex index = new AlphabeticIndex(ULocale.GERMAN); 971 index.addLabels(new UnicodeSet("[Æ{Sch*}{St*}]")); 972 // ... A Æ B-R S Sch St T-Z ... 973 ImmutableIndex immIndex = index.buildImmutableIndex(); 974 assertEquals("getBucketCount()", 31, index.getBucketCount()); 975 assertEquals("immutable getBucketCount()", 31, immIndex.getBucketCount()); 976 String[][] testCases = new String[][] { 977 // name, bucket index, bucket label 978 { "Adelbert", "1", "A" }, 979 { "Afrika", "1", "A" }, 980 { "Æsculap", "2", "Æ" }, 981 { "Aesthet", "2", "Æ" }, 982 { "Berlin", "3", "B" }, 983 { "Rilke", "19", "R" }, 984 { "Sacher", "20", "S" }, 985 { "Seiler", "20", "S" }, 986 { "Sultan", "20", "S" }, 987 { "Schiller", "21", "Sch" }, 988 { "Steiff", "22", "St" }, 989 { "Thomas", "23", "T" } 990 }; 991 List<String> labels = index.getBucketLabels(); 992 for (String[] testCase : testCases) { 993 String name = testCase[0]; 994 int bucketIndex = Integer.valueOf(testCase[1]); 995 String label = testCase[2]; 996 String msg = "getBucketIndex(" + name + ")"; 997 assertEquals(msg, bucketIndex, index.getBucketIndex(name)); 998 msg = "immutable " + msg; 999 assertEquals(msg, bucketIndex, immIndex.getBucketIndex(name)); 1000 msg = "bucket label (" + name + ")"; 1001 assertEquals(msg, label, labels.get(index.getBucketIndex(name))); 1002 msg = "immutable " + msg; 1003 assertEquals(msg, label, immIndex.getBucket(bucketIndex).getLabel()); 1004 } 1005 } 1006 1007 /** 1008 * With no real labels, there should be only the underflow label. 1009 */ 1010 @Test 1011 public void TestNoLabels() { 1012 RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 1013 AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(coll); 1014 index.addRecord("\u897f", 0); 1015 index.addRecord("i", 0); 1016 index.addRecord("\u03B1", 0); 1017 assertEquals("getRecordCount()", 3, index.getRecordCount()); // code coverage 1018 assertEquals("getBucketCount()", 1, index.getBucketCount()); // ... 1019 Bucket<Integer> bucket = index.iterator().next(); 1020 assertEquals("underflow label type", LabelType.UNDERFLOW, bucket.getLabelType()); 1021 assertEquals("all records in the underflow bucket", 3, bucket.size()); 1022 } 1023 1024 /** 1025 * Test with the Bopomofo-phonetic tailoring. 1026 */ 1027 @Test 1028 public void TestChineseZhuyin() { 1029 AlphabeticIndex index = new AlphabeticIndex(ULocale.forLanguageTag("zh-u-co-zhuyin")); 1030 ImmutableIndex immIndex = index.buildImmutableIndex(); 1031 assertEquals("getBucketCount()", 38, immIndex.getBucketCount()); // ... ㄅ ㄆ ㄇ ㄈ ㄉ -- ㄩ ... 1032 assertEquals("label 1", "ㄅ", immIndex.getBucket(1).getLabel()); 1033 assertEquals("label 2", "ㄆ", immIndex.getBucket(2).getLabel()); 1034 assertEquals("label 3", "ㄇ", immIndex.getBucket(3).getLabel()); 1035 assertEquals("label 4", "ㄈ", immIndex.getBucket(4).getLabel()); 1036 assertEquals("label 5", "ㄉ", immIndex.getBucket(5).getLabel()); 1037 } 1038 1039 @Test 1040 public void TestJapaneseKanji() { 1041 AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE); 1042 AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex(); 1043 // There are no index characters for Kanji in the Japanese standard collator. 1044 // They should all go into the overflow bucket. 1045 final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 }; 1046 int overflowIndex = immIndex.getBucketCount() - 1; 1047 for(int i = 0; i < kanji.length; ++i) { 1048 String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]); 1049 assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i]))); 1050 } 1051 } 1052 1053 @Test 1054 public void TestFrozenCollator() { 1055 // Ticket #9472 1056 RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da")); 1057 coll.setStrength(Collator.IDENTICAL); 1058 coll.freeze(); 1059 // The AlphabeticIndex constructor used to throw an exception 1060 // because it cloned the collator (which preserves frozenness) 1061 // and set the clone's strength to PRIMARY. 1062 AlphabeticIndex index = new AlphabeticIndex(coll); 1063 assertEquals("same strength as input Collator", 1064 Collator.IDENTICAL, index.getCollator().getStrength()); 1065 } 1066 1067 @Test 1068 public void TestChineseUnihan() { 1069 AlphabeticIndex index = new AlphabeticIndex(new ULocale("zh-u-co-unihan")); 1070 index.setMaxLabelCount(500); // ICU 54 default is 99. 1071 assertEquals("getMaxLabelCount()", 500, index.getMaxLabelCount()); // code coverage 1072 AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex(); 1073 int bucketCount = immIndex.getBucketCount(); 1074 if(bucketCount < 216) { 1075 // There should be at least an underflow and overflow label, 1076 // and one for each of 214 radicals, 1077 // and maybe additional labels for simplified radicals. 1078 // (ICU4C: dataerrln(), prints only a warning if the data is missing) 1079 errln("too few buckets/labels for Chinese/unihan: " + bucketCount + 1080 " (is zh/unihan data available?)"); 1081 return; 1082 } else { 1083 logln("Chinese/unihan has " + bucketCount + " buckets/labels"); 1084 } 1085 // bucketIndex = radical number, adjusted for simplified radicals in lower buckets. 1086 int bucketIndex = index.getBucketIndex("\u4e5d"); 1087 assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex); 1088 // radical 100, and there is a 90' since Unicode 8 1089 bucketIndex = index.getBucketIndex("\u7527"); 1090 assertEquals("getBucketIndex(U+7527)", 101, bucketIndex); 1091 } 1092 1093 @Test 1094 public void testAddLabels_Locale() { 1095 AlphabeticIndex<?> ulocaleIndex = new AlphabeticIndex<String>(ULocale.CANADA); 1096 AlphabeticIndex<?> localeIndex = new AlphabeticIndex<String>(Locale.CANADA); 1097 ulocaleIndex.addLabels(ULocale.SIMPLIFIED_CHINESE); 1098 localeIndex.addLabels(Locale.SIMPLIFIED_CHINESE); 1099 assertEquals("getBucketLables() results of ulocaleIndex and localeIndex differ", 1100 ulocaleIndex.getBucketLabels(), localeIndex.getBucketLabels()); 1101 } 1102 1103 @Test 1104 public void testGetRecordCount_empty() { 1105 assertEquals("Record count of empty index not 0", 0, 1106 new AlphabeticIndex<String>(ULocale.CANADA).getRecordCount()); 1107 } 1108 1109 @Test 1110 public void testGetRecordCount_withRecords() { 1111 assertEquals("Record count of index with one record not 1", 1, 1112 new AlphabeticIndex<String>(ULocale.CANADA).addRecord("foo", null).getRecordCount()); 1113 } 1114 1115 /** 1116 * Check that setUnderflowLabel/setOverflowLabel/setInflowLabel correctly influence the name of 1117 * generated labels. 1118 */ 1119 @Test 1120 public void testFlowLabels() { 1121 AlphabeticIndex<?> index = new AlphabeticIndex<String>(ULocale.ENGLISH) 1122 .addLabels(ULocale.forLanguageTag("ru")); 1123 index.setUnderflowLabel("underflow"); 1124 index.setOverflowLabel("overflow"); 1125 index.setInflowLabel("inflow"); 1126 index.addRecord("!", null); 1127 index.addRecord("\u03B1", null); // GREEK SMALL LETTER ALPHA 1128 index.addRecord("\uab70", null); // CHEROKEE SMALL LETTER A 1129 AlphabeticIndex.Bucket<?> underflowBucket = null; 1130 AlphabeticIndex.Bucket<?> overflowBucket = null; 1131 AlphabeticIndex.Bucket<?> inflowBucket = null; 1132 for (AlphabeticIndex.Bucket<?> bucket : index) { 1133 switch (bucket.getLabelType()) { 1134 case UNDERFLOW: 1135 assertNull("LabelType not null", underflowBucket); 1136 underflowBucket = bucket; 1137 break; 1138 case OVERFLOW: 1139 assertNull("LabelType not null", overflowBucket); 1140 overflowBucket = bucket; 1141 break; 1142 case INFLOW: 1143 assertNull("LabelType not null", inflowBucket); 1144 inflowBucket = bucket; 1145 break; 1146 } 1147 } 1148 assertNotNull("No bucket 'underflow'", underflowBucket); 1149 assertEquals("Wrong bucket label", "underflow", underflowBucket.getLabel()); 1150 assertEquals("Wrong bucket label", "underflow", index.getUnderflowLabel()); 1151 assertEquals("Bucket size not 1", 1, underflowBucket.size()); 1152 assertNotNull("No bucket 'overflow'", overflowBucket); 1153 assertEquals("Wrong bucket label", "overflow", overflowBucket.getLabel()); 1154 assertEquals("Wrong bucket label", "overflow", index.getOverflowLabel()); 1155 assertEquals("Bucket size not 1", 1, overflowBucket.size()); 1156 assertNotNull("No bucket 'inflow'", inflowBucket); 1157 assertEquals("Wrong bucket label", "inflow", inflowBucket.getLabel()); 1158 assertEquals("Wrong bucket label", "inflow", index.getInflowLabel()); 1159 assertEquals("Bucket size not 1", 1, inflowBucket.size()); 1160 } 1161} 1162