17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2008-2014, International Business Machines Corporation and 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.collator; 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Arrays; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Collection; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.LinkedHashSet; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Locale; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Set; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.TreeSet; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.test.TestFmwk; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.dev.util.CollectionUtilities; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.ICUDebug; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Row; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Row.R4; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UProperty; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript; 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.AlphabeticIndex; 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.AlphabeticIndex.Bucket; 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.AlphabeticIndex.Bucket.LabelType; 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.AlphabeticIndex.ImmutableIndex; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.AlphabeticIndex.Record; 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator; 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2; 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RawCollationKey; 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.RuleBasedCollator; 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16; 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet; 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale; 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Mark Davis 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class AlphabeticIndexTest extends TestFmwk { 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final String ARROW = "\u2192"; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean DEBUG = ICUDebug.enabled("alphabeticindex"); 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static Set<String> KEY_LOCALES = new LinkedHashSet(Arrays.asList( 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "en", "es", "de", "fr", "ja", "it", "tr", "pt", "zh", "nl", 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "pl", "ar", "ru", "zh_Hant", "ko", "th", "sv", "fi", "da", 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "he", "nb", "el", "hr", "bg", "sk", "lt", "vi", "lv", "sr", 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "pt_PT", "ro", "hu", "cs", "id", "sl", "fil", "fa", "uk", 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "ca", "hi", "et", "eu", "is", "sw", "ms", "bn", "am", "ta", 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "te", "mr", "ur", "ml", "kn", "gu", "or")); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String[][] localeAndIndexCharactersLists = new String[][] { 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Arabic*/ {"ar", "\u0627:\u0628:\u062A:\u062B:\u062C:\u062D:\u062E:\u062F:\u0630:\u0631:\u0632:\u0633:\u0634:\u0635:\u0636:\u0637:\u0638:\u0639:\u063A:\u0641:\u0642:\u0643:\u0644:\u0645:\u0646:\u0647:\u0648:\u064A"}, 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Bulgarian*/ {"bg", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"}, 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Catalan*/ {"ca", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Czech*/ {"cs", "A:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:P:Q:R:\u0158:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Danish*/ {"da", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* German*/ {"de", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Greek*/ {"el", "\u0391:\u0392:\u0393:\u0394:\u0395:\u0396:\u0397:\u0398:\u0399:\u039A:\u039B:\u039C:\u039D:\u039E:\u039F:\u03A0:\u03A1:\u03A3:\u03A4:\u03A5:\u03A6:\u03A7:\u03A8:\u03A9"}, 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* English*/ {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Spanish*/ {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Estonian*/ {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:Z:\u017D:T:U:V:\u00D5:\u00C4:\u00D6:\u00DC:X:Y"}, 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Finnish*/ {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"}, 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Filipino*/ {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Hebrew*/ {"he", "\u05D0:\u05D1:\u05D2:\u05D3:\u05D4:\u05D5:\u05D6:\u05D7:\u05D8:\u05D9:\u05DB:\u05DC:\u05DE:\u05E0:\u05E1:\u05E2:\u05E4:\u05E6:\u05E7:\u05E8:\u05E9:\u05EA"}, 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Icelandic*/ {"is", "A:\u00C1:B:C:D:\u00D0:E:\u00C9:F:G:H:I:\u00CD:J:K:L:M:N:O:\u00D3:P:Q:R:S:T:U:\u00DA:V:W:X:Y:\u00DD:Z:\u00DE:\u00C6:\u00D6"}, 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Italian*/ {"it", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Japanese*/ {"ja", "\u3042:\u304B:\u3055:\u305F:\u306A:\u306F:\u307E:\u3084:\u3089:\u308F"}, 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Korean*/ {"ko", "\u3131:\u3134:\u3137:\u3139:\u3141:\u3142:\u3145:\u3147:\u3148:\u314A:\u314B:\u314C:\u314D:\u314E"}, 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Lithuanian*/ {"lt", "A:B:C:\u010C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:\u0160:T:U:V:Z:\u017D"}, 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Latvian*/ {"lv", "A:B:C:\u010C:D:E:F:G:\u0122:H:I:J:K:\u0136:L:\u013B:M:N:\u0145:O:P:Q:R:S:\u0160:T:U:V:W:X:Z:\u017D"}, 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Norwegian Bokm\u00E5l*/ {"nb", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Dutch*/ {"nl", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Polish*/ {"pl", "A:\u0104:B:C:\u0106:D:E:\u0118:F:G:H:I:J:K:L:\u0141:M:N:\u0143:O:\u00D3:P:Q:R:S:\u015A:T:U:V:W:X:Y:Z:\u0179:\u017B"}, 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Portuguese*/ {"pt", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Romanian*/ {"ro", "A:\u0102:\u00C2:B:C:D:E:F:G:H:I:\u00CE:J:K:L:M:N:O:P:Q:R:S:\u0218:T:\u021A:U:V:W:X:Y:Z"}, 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Russian*/ {"ru", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0418:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042B:\u042D:\u042E:\u042F"}, 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Slovak*/ {"sk", "A:\u00C4:B:C:\u010C:D:E:F:G:H:CH:I:J:K:L:M:N:O:\u00D4:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Slovenian*/ {"sl", "A:B:C:\u010C:\u0106:D:\u0110:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\u0160:T:U:V:W:X:Y:Z:\u017D"}, 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Serbian*/ {"sr", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0402:\u0415:\u0416:\u0417:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040B:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Swedish*/ {"sv", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C5:\u00C4:\u00D6"}, 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Turkish*/ {"tr", "A:B:C:\u00C7:D:E:F:G:H:I:\u0130:J:K:L:M:N:O:\u00D6:P:Q:R:S:\u015E:T:U:\u00DC:V:W:X:Y:Z"}, 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Ukrainian*/ {"uk", "\u0410:\u0411:\u0412:\u0413:\u0490:\u0414:\u0415:\u0404:\u0416:\u0417:\u0418:\u0406:\u0407:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u0429:\u042E:\u042F"}, 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Vietnamese*/ {"vi", "A:\u0102:\u00C2:B:C:D:\u0110:E:\u00CA:F:G:H:I:J:K:L:M:N:O:\u00D4:\u01A0:P:Q:R:S:T:U:\u01AF:V:W:X:Y:Z"}, 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Chinese*/ {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* Chinese (Traditional Han)*/ {"zh_Hant", "1\u5283:2\u5283:3\u5283:4\u5283:5\u5283:6\u5283:7\u5283:8\u5283:9\u5283:10\u5283:11\u5283:12\u5283:13\u5283:14\u5283:15\u5283:16\u5283:17\u5283:18\u5283:19\u5283:20\u5283:21\u5283:22\u5283:23\u5283:24\u5283:25\u5283:26\u5283:27\u5283:28\u5283:29\u5283:30\u5283:31\u5283:32\u5283:33\u5283:35\u5283:36\u5283:39\u5283:48\u5283"}, 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Comment these out to make the test run faster. Later, make these run under extended 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Afrikaans*/ {"af", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Akan*/ {"ak", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:O:\u0186:P:Q:R:S:T:U:V:W:X:Y:Z"}, 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Asu*/ {"asa", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Azerbaijani*/ {"az", "A:B:C:\u00C7:D:E:\u018F:F:G:\u011E:H:X:I:\u0130:J:K:Q:L:M:N:O:\u00D6:P:R:S:\u015E:T:U:\u00DC:V:W:Y:Z"}, 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Belarusian*/ {"be", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0415:\u0416:\u0417:\u0406:\u0419:\u041A:\u041B:\u041C:\u041D:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u0428:\u042B:\u042D:\u042E:\u042F"}, 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Bemba*/ {"bem", "A:B:C:E:F:G:I:J:K:L:M:N:O:P:S:T:U:W:Y"}, 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Bena*/ {"bez", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:Y:Z"}, 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Bambara*/ {"bm", "A:B:C:D:E:\u0190:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:\u0186:P:R:S:T:U:W:Y:Z"}, 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Tibetan*/ {"bo", "\u0F40:\u0F41:\u0F42:\u0F44:\u0F45:\u0F46:\u0F47:\u0F49:\u0F4F:\u0F50:\u0F51:\u0F53:\u0F54:\u0F55:\u0F56:\u0F58:\u0F59:\u0F5A:\u0F5B:\u0F5D:\u0F5E:\u0F5F:\u0F60:\u0F61:\u0F62:\u0F63:\u0F64:\u0F66:\u0F67:\u0F68"}, 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Chiga*/ {"cgg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Cherokee*/ {"chr", "\u13A0:\u13A6:\u13AD:\u13B3:\u13B9:\u13BE:\u13C6:\u13CC:\u13D3:\u13DC:\u13E3:\u13E9:\u13EF"}, 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Welsh*/ {"cy", "A:B:C:CH:D:E:F:FF:G:H:I:J:L:LL:M:N:O:P:PH:R:RH:S:T:TH:U:W:Y"}, 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Taita*/ {"dav", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Embu*/ {"ebu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Ewe*/ {"ee", "A:B:C:D:\u0189:E:\u0190:F:\u0191:G:\u0194:H:I:J:K:L:M:N:\u014A:O:\u0186:P:Q:R:S:T:U:V:\u01B2:W:X:Y:Z"}, 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Esperanto*/ {"eo", "A:B:C:\u0108:D:E:F:G:\u011C:H:\u0124:I:J:\u0134:K:L:M:N:O:P:R:S:\u015C:T:U:\u016C:V:Z"}, 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Fulah*/ {"ff", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:L:M:N:\u014A:O:P:R:S:T:U:W:Y:\u01B3"}, 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Faroese*/ {"fo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8"}, 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Gusii*/ {"guz", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Hausa*/ {"ha", "A:B:\u0181:C:D:\u018A:E:F:G:H:I:J:K:\u0198:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Igbo*/ {"ig", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Machame*/ {"jmc", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kabyle*/ {"kab", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:P:Q:R:S:T:U:W:X:Y:Z"}, 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kamba*/ {"kam", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Makonde*/ {"kde", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kabuverdianu*/ {"kea", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:X:Z"}, 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Koyra Chiini*/ {"khq", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"}, 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kikuyu*/ {"ki", "A:B:C:D:E:G:H:I:J:K:M:N:O:R:T:U:W:Y"}, 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kalenjin*/ {"kln", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:W:Y"}, 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Langi*/ {"lag", "A:B:C:D:E:F:G:H:I:\u0197:J:K:L:M:N:O:P:Q:R:S:T:U:\u0244:V:W:X:Y:Z"}, 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Ganda*/ {"lg", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Luo*/ {"luo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"}, 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Luyia*/ {"luy", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Masai*/ {"mas", "A:B:C:D:E:\u0190:G:H:I:\u0197:J:K:L:M:N:\u014A:O:\u0186:P:R:S:T:U:\u0244:W:Y"}, 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Meru*/ {"mer", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Morisyen*/ {"mfe", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y:Z"}, 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Malagasy*/ {"mg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:V:Y:Z"}, 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This should be the correct data. Commented till it is fixed in CLDR collation data. 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // {"mk", "\u0410:\u0411:\u0412:\u0413:\u0403:\u0414:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u040C:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Macedonian*/ {"mk", "\u0410:\u0411:\u0412:\u0413:\u0414:\u0403:\u0415:\u0416:\u0417:\u0405:\u0418:\u0408:\u041A:\u041B:\u0409:\u041C:\u041D:\u040A:\u041E:\u041F:\u0420:\u0421:\u0422:\u040C:\u0423:\u0424:\u0425:\u0426:\u0427:\u040F:\u0428"}, 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This should be the correct data. Commented till it is fixed in CLDR collation data. 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // {"mt", "A:B:C:\u010A:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"}, 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Maltese*/ {"mt", "A:B:\u010A:C:D:E:F:\u0120:G:G\u0126:H:\u0126:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:\u017B:Z"}, 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Nama*/ {"naq", "A:B:C:D:E:F:G:H:I:K:M:N:O:P:Q:R:S:T:U:W:X:Y:Z"}, 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* North Ndebele*/ {"nd", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:S:T:U:V:W:X:Y:Z"}, 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Norwegian Nynorsk*/ {"nn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u00C6:\u00D8:\u00C5"}, 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Nyankole*/ {"nyn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Oromo*/ {"om", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Romansh*/ {"rm", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Rombo*/ {"rof", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Kinyarwanda*/ {"rw", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Rwa*/ {"rwk", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Samburu*/ {"saq", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y"}, 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Sena*/ {"seh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Koyraboro Senni*/ {"ses", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\u019D:\u014A:O:P:Q:R:S:T:U:W:X:Y:Z"}, 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Sango*/ {"sg", "A:B:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Tachelhit*/ {"shi", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"}, 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Tachelhit (Tifinagh)*/ {"shi_Tfng", "\u2D30:\u2D31:\u2D33:\u2D37:\u2D39:\u2D3B:\u2D3C:\u2D3D:\u2D40:\u2D43:\u2D44:\u2D45:\u2D47:\u2D49:\u2D4A:\u2D4D:\u2D4E:\u2D4F:\u2D53:\u2D54:\u2D55:\u2D56:\u2D59:\u2D5A:\u2D5B:\u2D5C:\u2D5F:\u2D61:\u2D62:\u2D63:\u2D65"}, 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Shona*/ {"sn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Teso*/ {"teo", "A:B:C:D:E:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:X:Y"}, 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Tonga*/ {"to", "A:B:C:D:E:F:G:H:\u02BB:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Central Morocco Tamazight*/ {"tzm", "A:B:C:D:E:\u0190:F:G:\u0194:H:I:J:K:L:M:N:Q:R:S:T:U:W:X:Y:Z"}, 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Uzbek (Latin)*/ {"uz_Latn", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\u02BF"}, 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Vunjo*/ {"vun", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:R:S:T:U:V:W:Y:Z"}, 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Soga*/ {"xog", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // /* Yoruba*/ {"yo", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static void main(String[] args) throws Exception{ 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new AlphabeticIndexTest().run(args); 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// public void TestAAKeyword() { 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// ICUResourceBundle rb = (ICUResourceBundle) UResourceBundle.getBundleInstance( 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// ICUResourceBundle.ICU_COLLATION_BASE_NAME, "zh"); 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// showBundle(rb, 0); 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// String[] keywords = Collator.getKeywords(); 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// System.out.println(Arrays.asList(keywords)); 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// String locale = "zh"; 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// ULocale ulocale = new ULocale(locale); 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// for (String keyword : keywords) { 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// List<String> values = Arrays.asList(Collator.getKeywordValuesForLocale(keyword, ulocale, false)); 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// List<String> allValues = Arrays.asList(Collator.getKeywordValues(keyword)); 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// for (String value : allValues) { 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// System.out.println(keyword + "=" + value); 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// checkKeyword(locale, value, values.contains(value)); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// private void checkKeyword(String locale, String collationValue, boolean shouldExist) { 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// final ULocale base = new ULocale(locale); 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// final ULocale desired = new ULocale(locale + "@collation=" + collationValue); 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// Collator foo = Collator.getInstance(desired); 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// ULocale actual = foo.getLocale(ULocale.ACTUAL_LOCALE); 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// if (shouldExist) { 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// assertEquals("actual should match desired", desired, actual); 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } else { 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// assertEquals("actual should match base", base, actual); 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// int comp = foo.compare("a", "ā"); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// assertEquals("should fall back to default for zh", -1, comp); 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// /** 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// * @param rb 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// * @param i 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// */ 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// private static void showBundle(UResourceBundle rb, int i) { 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// for (String key : rb.keySet()) { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// System.out.print("\n" + Utility.repeat(" ", i) + key); 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// UResourceBundle rb2 = rb.get(key); 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// showBundle(rb2, i+1); 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// } 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestA() { 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[][] tests = {{"zh_Hant", "渡辺", "12劃"}, 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"zh", "渡辺", "D"} 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*, "zh@collation=unihan", "ja@collation=unihan", "ko@collation=unihan"*/ 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String[] test : tests) { 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Integer> alphabeticIndex = new AlphabeticIndex<Integer>(new ULocale(test[0])); 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final String probe = test[1]; 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final String expectedLabel = test[2]; 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.addRecord(probe, 1); 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List labels = alphabeticIndex.getBucketLabels(); 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(labels.toString()); 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Bucket<Integer> bucket = find(alphabeticIndex, probe); 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("locale " + test[0] + " name=" + probe + " in bucket", 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert expectedLabel, bucket.getLabel()); 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Bucket<Integer> find(AlphabeticIndex<Integer> alphabeticIndex, final String probe) { 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<Integer> bucket : alphabeticIndex) { 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Record<Integer> record : bucket) { 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (record.getName().equals(probe)) { 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return bucket; 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return null; 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestFirstCharacters() { 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex alphabeticIndex = new AlphabeticIndex(Locale.ENGLISH); 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator collator = alphabeticIndex.getCollator(); 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert collator.setStrength(Collator.IDENTICAL); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> firsts = alphabeticIndex.getFirstCharactersInScripts(); 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Verify that each script is represented exactly once. 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet missingScripts = new UnicodeSet("[^[:sc=inherited:][:sc=unknown:][:sc=common:][:Script=Braille:]]"); 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String last = ""; 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String index : firsts) { 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (collator.compare(last,index) >= 0) { 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Characters not in order: " + last + " !< " + index); 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = getFirstRealScript(index); 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (script == UScript.UNKNOWN) { continue; } 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet s = new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script); 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (missingScripts.containsNone(s)) { 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("2nd character in script: " + index + "\t" + new UnicodeSet(missingScripts).retainAll(s).toPattern(false)); 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert missingScripts.removeAll(s); 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (missingScripts.size() != 0) { 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String missingScriptNames = ""; 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet missingChars = new UnicodeSet(missingScripts); 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = missingChars.charAt(0); 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (c < 0) { 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = UScript.getScript(c); 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert missingScriptNames += " " + 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UCharacter.getPropertyValueName( 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UProperty.SCRIPT, script, UProperty.NameChoice.SHORT); 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert missingChars.removeAll(new UnicodeSet().applyIntPropertyValue(UProperty.SCRIPT, script)); 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Missing character from:" + missingScriptNames + " -- " + missingScripts); 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int getFirstRealScript(CharSequence s) { 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < s.length();) { 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = Character.codePointAt(s, i); 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = UScript.getScript(c); 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (script != UScript.UNKNOWN && script != UScript.INHERITED && script != UScript.COMMON) { 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return script; 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i += Character.charCount(c); 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return UScript.UNKNOWN; 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestBuckets() { 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale additionalLocale = ULocale.ENGLISH; 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String[] pair : localeAndIndexCharactersLists) { 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert checkBuckets(pair[0], SimpleTests, additionalLocale, "E", "edgar", "Effron", "Effron"); 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestEmpty() { 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // just verify that it doesn't blow up. 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set<ULocale> locales = new LinkedHashSet<ULocale>(); 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locales.add(ULocale.ROOT); 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locales.addAll(Arrays.asList(ULocale.getAvailableLocales())); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (ULocale locale : locales) { 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<String> alphabeticIndex = new AlphabeticIndex(locale); 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.addRecord("hi", "HI"); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<String> bucket : alphabeticIndex) { 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @SuppressWarnings("unused") 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LabelType labelType = bucket.getLabelType(); 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Exception when creating AlphabeticIndex for:\t" + locale.toLanguageTag()); 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln(e.toString()); 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestInflow() { 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Object[][] tests = { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {0, ULocale.ENGLISH}, 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {0, ULocale.ENGLISH, new ULocale("el")}, 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {1, ULocale.ENGLISH, new ULocale("ru")}, 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {0, ULocale.ENGLISH, new ULocale("el"), new UnicodeSet("[\u2C80]"), new ULocale("ru")}, 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {0, ULocale.ENGLISH}, 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {2, ULocale.ENGLISH, new ULocale("ru"), ULocale.JAPANESE}, 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Object[] test : tests) { 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int expected = (Integer) test[0]; 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex((ULocale)test[1]); 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 2; i < test.length; ++i) { 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (test[i] instanceof ULocale) { 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.addLabels((ULocale)test[i]); 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.addLabels((UnicodeSet)test[i]); 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Counter<AlphabeticIndex.Bucket.LabelType> counter = new Counter(); 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<Double> bucket : alphabeticIndex) { 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert LabelType labelType = bucket.getLabelType(); 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert counter.add(labelType, 1); 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String printList = Arrays.asList(test).toString(); 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(LabelType.UNDERFLOW + "\t" + printList, 1, counter.get(LabelType.UNDERFLOW)); 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(LabelType.INFLOW + "\t" + printList, expected, counter.get(LabelType.INFLOW)); 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (expected != counter.get(LabelType.INFLOW)) { 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // for debugging 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Double> indexCharacters2 = new AlphabeticIndex((ULocale)test[1]); 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 2; i < test.length; ++i) { 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (test[i] instanceof ULocale) { 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert indexCharacters2.addLabels((ULocale)test[i]); 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert indexCharacters2.addLabels((UnicodeSet)test[i]); 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<Bucket<Double>> buckets = CollectionUtilities.addAll(alphabeticIndex.iterator(), new ArrayList<Bucket<Double>>()); 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(buckets.toString()); 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(LabelType.OVERFLOW + "\t" + printList, 1, counter.get(LabelType.OVERFLOW)); 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void checkBuckets(String localeString, String[] test, ULocale additionalLocale, String testBucket, String... items) { 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder UI = new StringBuilder(); 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale desiredLocale = new ULocale(localeString); 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Create a simple index where the values for the strings are Integers, and add the strings 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(desiredLocale).addLabels(additionalLocale); 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int counter = 0; 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Counter<String> itemCount = new Counter(); 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String item : test) { 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addRecord(item, counter++); 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert itemCount.add(item, 1); 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<String> labels = index.getBucketLabels(); 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ImmutableIndex<Integer> immIndex = index.buildImmutableIndex(); 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(desiredLocale + "\t" + desiredLocale.getDisplayName(ULocale.ENGLISH) + " - " + desiredLocale.getDisplayName(desiredLocale) + "\t" 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + index.getCollator().getLocale(ULocale.ACTUAL_LOCALE)); 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UI.setLength(0); 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UI.append(desiredLocale + "\t"); 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean showAll = true; 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Show index at top. We could skip or gray out empty buckets 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (AlphabeticIndex.Bucket<Integer> bucket : index) { 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (showAll || bucket.size() != 0) { 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showLabelAtTop(UI, bucket.getLabel()); 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(UI.toString()); 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Show the buckets with their contents, skipping empty buckets 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = 0; 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<Integer> bucket : index) { 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("bucket label vs. iterator", 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert labels.get(bucketIndex), bucket.getLabel()); 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("bucket label vs. immutable", 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert labels.get(bucketIndex), immIndex.getBucket(bucketIndex).getLabel()); 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("bucket label type vs. immutable", 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucket.getLabelType(), immIndex.getBucket(bucketIndex).getLabelType()); 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Record<Integer> r : bucket) { 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence name = r.getName(); 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(" + name + ")", 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex, index.getBucketIndex(name)); 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("immutable getBucketIndex(" + name + ")", 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex, immIndex.getBucketIndex(name)); 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (bucket.getLabel().equals(testBucket)) { 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Counter<String> keys = getKeys(bucket); 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String item : items) { 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long globalCount = itemCount.get(item); 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long localeCount = keys.get(item); 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (globalCount != localeCount) { 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Error: in " + "'" + testBucket + "', '" + item + "' should have count " 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + globalCount + " but has count " + localeCount); 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (bucket.size() != 0) { 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showLabelInList(UI, bucket.getLabel()); 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (AlphabeticIndex.Record<Integer> item : bucket) { 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showIndexedItem(UI, item.getName(), item.getData()); 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(UI.toString()); 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++bucketIndex; 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", bucketIndex, index.getBucketCount()); 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("immutable getBucketCount()", bucketIndex, immIndex.getBucketCount()); 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertNull("immutable getBucket(-1)", immIndex.getBucket(-1)); 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertNull("immutable getBucket(count)", immIndex.getBucket(bucketIndex)); 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<Integer> bucket : immIndex) { 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("immutable bucket size", 0, bucket.size()); 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertFalse("immutable bucket iterator.hasNext()", bucket.iterator().hasNext()); 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public <T> void showIndex(AlphabeticIndex<T> index, boolean showEmpty) { 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Actual"); 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder UI = new StringBuilder(); 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Bucket<T> bucket : index) { 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (showEmpty || bucket.size() != 0) { 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showLabelInList(UI, bucket.getLabel()); 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Record<T> item : bucket) { 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showIndexedItem(UI, item.getName(), item.getData()); 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(UI.toString()); 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param myBucketLabels 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param myBucketContents 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param b 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void showIndex(List<String> myBucketLabels, ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents, boolean showEmpty) { 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Alternative"); 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder UI = new StringBuilder(); 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < myBucketLabels.size(); ++i) { 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set<R4<RawCollationKey, String, Integer, Double>> bucket = myBucketContents.get(i); 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!showEmpty && bucket.size() == 0) { 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UI.setLength(0); 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UI.append("*").append(myBucketLabels.get(i)); 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (R4<RawCollationKey, String, Integer, Double> item : bucket) { 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UI.append("\t ").append(item.get1().toString()).append(ARROW).append(item.get3().toString()); 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(UI.toString()); 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void showLabelAtTop(StringBuilder buffer, String label) { 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append(label + " "); 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private <T> void showIndexedItem(StringBuilder buffer, CharSequence key, T value) { 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append("\t " + key + ARROW + value); 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void showLabelInList(StringBuilder buffer, String label) { 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.setLength(0); 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append(label); 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Counter<String> getKeys(AlphabeticIndex.Bucket<Integer> entry) { 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Counter<String> keys = new Counter<String>(); 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (AlphabeticIndex.Record x : entry) { 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key = x.getName().toString(); 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keys.add(key, 1); 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return keys; 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestIndexCharactersList() { 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String[] localeAndIndexCharacters : localeAndIndexCharactersLists) { 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale locale = new ULocale(localeAndIndexCharacters[0]); 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String expectedIndexCharacters = "\u2026:" + localeAndIndexCharacters[1] + ":\u2026"; 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> alphabeticIndex = new AlphabeticIndex(locale).getBucketLabels(); 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Join the elements of the list to a string with delimiter ":" 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder sb = new StringBuilder(); 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator<String> iter = alphabeticIndex.iterator(); 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (iter.hasNext()) { 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sb.append(iter.next()); 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!iter.hasNext()) { 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert sb.append(":"); 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String actualIndexCharacters = sb.toString(); 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!expectedIndexCharacters.equals(actualIndexCharacters)) { 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Test failed for locale " + localeAndIndexCharacters[0] + 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\n Expected = |" + expectedIndexCharacters + "|\n actual = |" + actualIndexCharacters + "|"); 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestBasics() { 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale[] list = ULocale.getAvailableLocales(); 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // get keywords combinations 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // don't bother with multiple combinations at this point 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List keywords = new ArrayList(); 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keywords.add(""); 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] collationValues = Collator.getKeywordValues("collation"); 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j = 0; j < collationValues.length; ++j) { 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert keywords.add("@collation=" + collationValues[j]); 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < list.length; ++i) { 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Iterator it = keywords.iterator(); it.hasNext();) { 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String collationValue = (String) it.next(); 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String localeString = list[i].toString(); 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!KEY_LOCALES.contains(localeString)) continue; // TODO change in exhaustive 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale locale = new ULocale(localeString + collationValue); 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (collationValue.length() > 0 && !Collator.getFunctionalEquivalent("collation", locale).equals(locale)) { 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //logln("Skipping " + locale); 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (locale.getCountry().length() != 0) { 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isUnihan = collationValue.contains("unihan"); 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex alphabeticIndex = new AlphabeticIndex(locale); 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (isUnihan) { 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Unihan tailorings have a label per radical, and there are at least 214, 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if not more when simplified radicals are distinguished. 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.setMaxLabelCount(500); 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final Collection mainChars = alphabeticIndex.getBucketLabels(); 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String mainCharString = mainChars.toString(); 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (mainCharString.length() > 500) { 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert mainCharString = mainCharString.substring(0,500) + "..."; 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln(mainChars.size() + "\t" + locale + "\t" + locale.getDisplayName(ULocale.ENGLISH)); 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Index:\t" + mainCharString); 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!isUnihan && mainChars.size() > 100) { 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("Index character set too large: " + 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert locale + " [" + mainChars.size() + "]:\n " + mainChars); 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestClientSupport() { 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String localeString : new String[] {"zh"}) { // KEY_LOCALES, new String[] {"zh"} 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ULocale ulocale = new ULocale(localeString); 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Double> alphabeticIndex = new AlphabeticIndex<Double>(ulocale).addLabels(ULocale.ENGLISH); 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator collator = alphabeticIndex.getCollator(); 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String [][] tests; 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!localeString.equals("zh") ) { 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert tests = new String[][] {SimpleTests}; 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert tests = new String[][] {SimpleTests, hackPinyin, simplifiedNames}; 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String [] shortTest : tests) { 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert double testValue = 100; 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.clearRecords(); 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String name : shortTest) { 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.addRecord(name, testValue++); 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) showIndex(alphabeticIndex, false); 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // make my own copy 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert testValue = 100; 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<String> myBucketLabels = alphabeticIndex.getBucketLabels(); 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>> myBucketContents = new ArrayList<Set<R4<RawCollationKey, String, Integer, Double>>>(myBucketLabels.size()); 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < myBucketLabels.size(); ++i) { 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert myBucketContents.add(new TreeSet<R4<RawCollationKey, String, Integer, Double>>()); 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String name : shortTest) { 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = alphabeticIndex.getBucketIndex(name); 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (bucketIndex > myBucketContents.size()) { 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert alphabeticIndex.getBucketIndex(name); // call again for debugging 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(bucketIndex); 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RawCollationKey rawCollationKey = collator.getRawCollationKey(name, null); 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert R4<RawCollationKey, String, Integer, Double> row = Row.of(rawCollationKey, name, name.length(), testValue++); 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert myBucket.add(row); 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DEBUG) showIndex(myBucketLabels, myBucketContents, false); 6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // now compare 6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int index = 0; 6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean gotError = false; 6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (AlphabeticIndex.Bucket<Double> bucket : alphabeticIndex) { 6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String bucketLabel = bucket.getLabel(); 6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String myLabel = myBucketLabels.get(index); 6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!bucketLabel.equals(myLabel)) { 6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert gotError |= !assertEquals(ulocale + "\tBucket Labels (" + index + ")", bucketLabel, myLabel); 6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set<R4<RawCollationKey, String, Integer, Double>> myBucket = myBucketContents.get(index); 6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator<R4<RawCollationKey, String, Integer, Double>> myBucketIterator = myBucket.iterator(); 6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int recordIndex = 0; 6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Record<Double> record : bucket) { 6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String myName = null; 6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (myBucketIterator.hasNext()) { 6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next(); 6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert myName = (String) myRecord.get1(); 6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!record.getName().equals(myName)) { 6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", record.getName(), myName); 6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (myBucketIterator.hasNext()) { 6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert R4<RawCollationKey, String, Integer, Double> myRecord = myBucketIterator.next(); 6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String myName = (String) myRecord.get1(); 6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert gotError |= !assertEquals(ulocale + "\t" + bucketLabel + "\t" + "Record Names (" + index + "." + recordIndex++ + ")", null, myName); 6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index++; 6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (gotError) { 6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showIndex(myBucketLabels, myBucketContents, false); 6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert showIndex(alphabeticIndex, false); 6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestFirstScriptCharacters() { 6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> firstCharacters = 6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert new AlphabeticIndex(ULocale.ENGLISH).getFirstCharactersInScripts(); 6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> expectedFirstCharacters = firstStringsInScript((RuleBasedCollator) Collator.getInstance(ULocale.ROOT)); 6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> diff = new TreeSet<String>(firstCharacters); 6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert diff.removeAll(expectedFirstCharacters); 6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertTrue("First Characters contains unexpected ones: " + diff, diff.isEmpty()); 6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert diff.clear(); 6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert diff.addAll(expectedFirstCharacters); 6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert diff.removeAll(firstCharacters); 6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertTrue("First Characters missing expected ones: " + diff, diff.isEmpty()); 6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final UnicodeSet TO_TRY = new UnicodeSet("[[:^nfcqc=no:]-[:sc=Common:]-[:sc=Inherited:]-[:sc=Unknown:]]").freeze(); 6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a collection of all the "First" characters of scripts, according to the collation. 6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Collection<String> firstStringsInScript(RuleBasedCollator ruleBasedCollator) { 6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] results = new String[UScript.CODE_LIMIT]; 6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String current : TO_TRY) { 6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (ruleBasedCollator.compare(current, "a") < 0) { // we only want "real" script characters, not symbols. 6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = UScript.getScript(current.codePointAt(0)); 6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (results[script] == null) { 6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert results[script] = current; 6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (ruleBasedCollator.compare(current, results[script]) < 0) { 6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert results[script] = current; 6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet extras = new UnicodeSet(); 6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet expansions = new UnicodeSet(); 6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ruleBasedCollator.getContractionsAndExpansions(extras, expansions, true); 6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert extras.addAll(expansions).removeAll(TO_TRY); 6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (extras.size() != 0) { 6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Normalizer2 normalizer = Normalizer2.getNFKCInstance(); 6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String current : extras) { 6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!normalizer.isNormalized(current) || ruleBasedCollator.compare(current, "9") <= 0) { 6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert continue; 6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int script = getFirstRealScript(current); 6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (script == UScript.UNKNOWN && !isUnassignedBoundary(current)) { continue; } 6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (results[script] == null) { 7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert results[script] = current; 7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (ruleBasedCollator.compare(current, results[script]) < 0) { 7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert results[script] = current; 7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } // why have a checked exception??? 7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: We should not test that we get the same strings, but that we 7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // get strings that sort primary-equal to those from the implementation. 7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collection<String> result = new ArrayList<String>(); 7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < results.length; ++i) { 7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (results[i] != null) { 7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.add(results[i]); 7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean isUnassignedBoundary(CharSequence s) { 7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The root collator provides a script-first-primary boundary contraction 7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // for the unassigned-implicit range. 7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return s.charAt(0) == 0xfdd1 && 7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UScript.getScript(Character.codePointAt(s, 1)) == UScript.UNKNOWN; 7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestZZZ() { 7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // int x = 3; 7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // AlphabeticIndex index = new AlphabeticIndex(ULocale.ENGLISH); 7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // UnicodeSet additions = new UnicodeSet(); 7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // additions.add(0x410).add(0x415); // Cyrillic 7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // // additions.add(0x391).add(0x393); // Greek 7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // index.addLabels(additions); 7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // int lc = index.getLabels().size(); 7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // List labels = index.getLabels(); 7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // System.out.println("Label Count = " + lc + "\t" + labels); 7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // System.out.println("Bucket Count =" + index.getBucketCount()); 7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestSimplified() { 7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert checkBuckets("zh", simplifiedNames, ULocale.ENGLISH, "W", "\u897f"); 7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestTraditional() { 7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert checkBuckets("zh_Hant", traditionalNames, ULocale.ENGLISH, "\u4e9f", "\u5357\u9580"); 7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String[] SimpleTests = { 7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "斎藤", 7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u1f2d\u03c1\u03b1", 7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "$", "\u00a3", "12", "2", 7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "Davis", "Davis", "Abbot", "\u1D05avis", "Zach", "\u1D05avis", "\u01b5", "\u0130stanbul", "Istanbul", "istanbul", "\u0131stanbul", 7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u00deor", "\u00c5berg", "\u00d6stlund", 7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u1f2d\u03c1\u03b1", "\u1f08\u03b8\u03b7\u03bd\u1fb6", 7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u0396\u03b5\u03cd\u03c2", "\u03a0\u03bf\u03c3\u03b5\u03b9\u03b4\u1f63\u03bd", "\u1f0d\u03b9\u03b4\u03b7\u03c2", "\u0394\u03b7\u03bc\u03ae\u03c4\u03b7\u03c1", "\u1f19\u03c3\u03c4\u03b9\u03ac", 7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //"\u1f08\u03c0\u03cc\u03bb\u03bb\u03c9\u03bd", "\u1f0c\u03c1\u03c4\u03b5\u03bc\u03b9\u03c2", "\u1f19\u03c1\u03bc\u1f23\u03c2", "\u1f0c\u03c1\u03b7\u03c2", "\u1f08\u03c6\u03c1\u03bf\u03b4\u03af\u03c4\u03b7", "\u1f2d\u03c6\u03b1\u03b9\u03c3\u03c4\u03bf\u03c2", "\u0394\u03b9\u03cc\u03bd\u03c5\u03c3\u03bf\u03c2", 7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6589\u85e4", "\u4f50\u85e4", "\u9234\u6728", "\u9ad8\u6a4b", "\u7530\u4e2d", "\u6e21\u8fba", "\u4f0a\u85e4", "\u5c71\u672c", "\u4e2d\u6751", "\u5c0f\u6797", "\u658e\u85e4", "\u52a0\u85e4", 7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert //"\u5409\u7530", "\u5c71\u7530", "\u4f50\u3005\u6728", "\u5c71\u53e3", "\u677e\u672c", "\u4e95\u4e0a", "\u6728\u6751", "\u6797", "\u6e05\u6c34" 7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String[] hackPinyin = { 7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "a", "\u5416", "\u58ba", // 7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "b", "\u516b", "\u62d4", "\u8500", // 7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "c", "\u5693", "\u7938", "\u9e7e", // 7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "d", "\u5491", "\u8fcf", "\u964a", // 7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "e","\u59b8", "\u92e8", "\u834b", // 7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "f", "\u53d1", "\u9197", "\u99a5", // 7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "g", "\u7324", "\u91d3", "\u8142", // 7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "h", "\u598e", "\u927f", "\u593b", // 7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "j", "\u4e0c", "\u6785", "\u9d58", // 7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "k", "\u5494", "\u958b", "\u7a52", // 7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "l", "\u5783", "\u62c9", "\u9ba5", // 7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "m", "\u5638", "\u9ebb", "\u65c0", // 7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "n", "\u62ff", "\u80ad", "\u685b", // 7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "o", "\u5662", "\u6bee", "\u8bb4", // 7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "p", "\u5991", "\u8019", "\u8c31", // 7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "q", "\u4e03", "\u6053", "\u7f56", // 7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "r", "\u5465", "\u72aa", "\u6e03", // 7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "s", "\u4ee8", "\u9491", "\u93c1", // 7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "t", "\u4ed6", "\u9248", "\u67dd", // 7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "w", "\u5c72", "\u5558", "\u5a7a", // 7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "x", "\u5915", "\u5438", "\u6bbe", // 7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "y", "\u4e2b", "\u82bd", "\u8574", // 7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "z", "\u5e00", "\u707d", "\u5c0a" 7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String[] simplifiedNames = { 7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "Abbot", "Morton", "Zachary", "Williams", "\u8d75", "\u94b1", "\u5b59", "\u674e", "\u5468", "\u5434", "\u90d1", "\u738b", "\u51af", "\u9648", "\u696e", "\u536b", "\u848b", "\u6c88", 7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u97e9", "\u6768", "\u6731", "\u79e6", "\u5c24", "\u8bb8", "\u4f55", "\u5415", "\u65bd", "\u5f20", "\u5b54", "\u66f9", "\u4e25", "\u534e", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", "\u8c22", "\u90b9", 7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u55bb", "\u67cf", "\u6c34", "\u7aa6", "\u7ae0", "\u4e91", "\u82cf", "\u6f58", "\u845b", "\u595a", "\u8303", "\u5f6d", "\u90ce", "\u9c81", "\u97e6", "\u660c", "\u9a6c", "\u82d7", "\u51e4", "\u82b1", "\u65b9", 7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9c8d", "\u53f2", "\u5510", "\u8d39", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8d3a", "\u502a", "\u6c64", "\u6ed5", "\u6bb7", "\u7f57", "\u6bd5", "\u90dd", 7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u90ac", "\u5b89", "\u5e38", "\u4e50", "\u4e8e", "\u65f6", "\u5085", "\u76ae", "\u535e", "\u9f50", "\u5eb7", "\u4f0d", "\u4f59", "\u5143", "\u535c", "\u987e", "\u5b5f", "\u5e73", "\u9ec4", "\u548c", "\u7a46", 7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8427", "\u5c39", "\u59da", "\u90b5", "\u6e5b", "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8d1d", "\u660e", "\u81e7", "\u8ba1", "\u4f0f", "\u6210", "\u6234", "\u8c08", "\u5b8b", "\u8305", 7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5e9e", "\u718a", "\u7eaa", "\u8212", "\u5c48", "\u9879", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u84dd", "\u95fd", "\u5e2d", "\u5b63", "\u9ebb", "\u5f3a", "\u8d3e", "\u8def", "\u5a04", "\u5371", 7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6c5f", "\u7ae5", "\u989c", "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u953a", "\u5f90", "\u4e18", "\u9a86", "\u9ad8", "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", 7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4e07", "\u652f", "\u67ef", "\u661d", "\u7ba1", "\u5362", "\u83ab", "\u7ecf", "\u623f", "\u88d8", "\u7f2a", "\u5e72", "\u89e3", "\u5e94", "\u5b97", "\u4e01", "\u5ba3", "\u8d32", "\u9093", "\u90c1", "\u5355", 7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u676d", "\u6d2a", "\u5305", "\u8bf8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u94ae", "\u9f9a", "\u7a0b", "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9646", "\u8363", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", 7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u60e0", "\u7504", "\u9eb9", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u50a8", "\u9773", "\u6c72", "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u4e4c", "\u7126", "\u5df4", "\u5f13", 7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8f66", "\u4faf", "\u5b93", "\u84ec", "\u5168", "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bab", "\u5b81", "\u4ec7", "\u683e", "\u66b4", "\u7518", 8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u659c", "\u5389", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5218", "\u666f", "\u8a79", "\u675f", "\u9f99", "\u53f6", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u84df", "\u8584", "\u5370", "\u5bbf", 8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u767d", "\u6000", "\u84b2", "\u90b0", "\u4ece", "\u9102", "\u7d22", "\u54b8", "\u7c4d", "\u8d56", "\u5353", "\u853a", "\u5c60", "\u8499", "\u6c60", "\u4e54", "\u9634", "\u90c1", "\u80e5", "\u80fd", "\u82cd", 8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u53cc", "\u95fb", "\u8398", "\u515a", "\u7fdf", "\u8c2d", "\u8d21", "\u52b3", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u90e6", "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", 8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6fee", "\u725b", "\u5bff", "\u901a", "\u8fb9", "\u6248", "\u71d5", "\u5180", "\u90cf", "\u6d66", "\u5c1a", "\u519c", "\u6e29", "\u522b", "\u5e84", "\u664f", "\u67f4", "\u77bf", "\u960e", "\u5145", "\u6155", 8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8fde", "\u8339", "\u4e60", "\u5ba6", "\u827e", "\u9c7c", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", "\u7ec8", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", 8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6ee1", "\u5f18", "\u5321", "\u56fd", "\u6587", "\u5bc7", "\u5e7f", "\u7984", "\u9619", "\u4e1c", "\u6b27", "\u6bb3", "\u6c83", "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e08", "\u5de9", "\u538d", 8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8042", "\u6641", "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u961a", "\u90a3", "\u7b80", "\u9976", "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u517b", "\u97a0", "\u987b", "\u4e30", 8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5de2", "\u5173", "\u84af", "\u76f8", "\u67e5", "\u540e", "\u8346", "\u7ea2", "\u6e38", "\u7afa", "\u6743", "\u9011", "\u76d6", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u9a6c", "\u4e0a\u5b98", "\u6b27\u9633", 8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u590f\u4faf", "\u8bf8\u845b", "\u95fb\u4eba", "\u4e1c\u65b9", "\u8d6b\u8fde", "\u7687\u752b", "\u5c09\u8fdf", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", "\u6fee\u9633", "\u6df3\u4e8e", "\u5355\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b59", "\u4ef2\u5b59", 8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8f69\u8f95", "\u4ee4\u72d0", "\u953a\u79bb", "\u5b87\u6587", "\u957f\u5b59", "\u6155\u5bb9", "\u9c9c\u4e8e", "\u95fe\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8f66", "\u989b\u5b59", "\u7aef\u6728", "\u5deb\u9a6c", 8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u516c\u897f", "\u6f06\u96d5", "\u4e50\u6b63", "\u58e4\u9a77", "\u516c\u826f", "\u62d3\u62d4", "\u5939\u8c37", "\u5bb0\u7236", "\u8c37\u6881", "\u664b", "\u695a", "\u960e", "\u6cd5", "\u6c5d", "\u9122", "\u6d82", "\u94a6", "\u6bb5\u5e72", "\u767e\u91cc", 8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4e1c\u90ed", "\u5357\u95e8", "\u547c\u5ef6", "\u5f52", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e05", "\u7f11", "\u4ea2", "\u51b5", "\u540e", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u4e1c\u95e8", "\u897f\u95e8", 8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5546", "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8d4f", "\u5357\u5bab", "\u58a8", "\u54c8", "\u8c2f", "\u7b2a", "\u5e74", "\u7231", "\u9633", "\u4f5f" 8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static final String[] traditionalNames = { "丁", "Abbot", "Morton", "Zachary", "Williams", "\u8d99", "\u9322", "\u5b6b", 8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u674e", "\u5468", "\u5433", "\u912d", "\u738b", "\u99ae", "\u9673", "\u696e", "\u885b", "\u8523", 8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6c88", "\u97d3", "\u694a", "\u6731", "\u79e6", "\u5c24", "\u8a31", "\u4f55", "\u5442", "\u65bd", 8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5f35", "\u5b54", "\u66f9", "\u56b4", "\u83ef", "\u91d1", "\u9b4f", "\u9676", "\u59dc", "\u621a", 8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8b1d", "\u9112", "\u55bb", "\u67cf", "\u6c34", "\u7ac7", "\u7ae0", "\u96f2", "\u8607", "\u6f58", 8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u845b", "\u595a", "\u7bc4", "\u5f6d", "\u90ce", "\u9b6f", "\u97cb", "\u660c", "\u99ac", "\u82d7", 8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u9cf3", "\u82b1", "\u65b9", "\u4fde", "\u4efb", "\u8881", "\u67f3", "\u9146", "\u9b91", "\u53f2", 8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5510", "\u8cbb", "\u5ec9", "\u5c91", "\u859b", "\u96f7", "\u8cc0", "\u502a", "\u6e6f", "\u6ed5", 8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6bb7", "\u7f85", "\u7562", "\u90dd", "\u9114", "\u5b89", "\u5e38", "\u6a02", "\u65bc", "\u6642", 8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5085", "\u76ae", "\u535e", "\u9f4a", "\u5eb7", "\u4f0d", "\u9918", "\u5143", "\u535c", "\u9867", 8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5b5f", "\u5e73", "\u9ec3", "\u548c", "\u7a46", "\u856d", "\u5c39", "\u59da", "\u90b5", "\u6e5b", 8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6c6a", "\u7941", "\u6bdb", "\u79b9", "\u72c4", "\u7c73", "\u8c9d", "\u660e", "\u81e7", "\u8a08", 8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4f0f", "\u6210", "\u6234", "\u8ac7", "\u5b8b", "\u8305", "\u9f90", "\u718a", "\u7d00", "\u8212", 8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5c48", "\u9805", "\u795d", "\u8463", "\u6881", "\u675c", "\u962e", "\u85cd", "\u95a9", "\u5e2d", 8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5b63", "\u9ebb", "\u5f37", "\u8cc8", "\u8def", "\u5a41", "\u5371", "\u6c5f", "\u7ae5", "\u984f", 8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u90ed", "\u6885", "\u76db", "\u6797", "\u5201", "\u937e", "\u5f90", "\u4e18", "\u99f1", "\u9ad8", 8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u590f", "\u8521", "\u7530", "\u6a0a", "\u80e1", "\u51cc", "\u970d", "\u865e", "\u842c", "\u652f", 8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u67ef", "\u661d", "\u7ba1", "\u76e7", "\u83ab", "\u7d93", "\u623f", "\u88d8", "\u7e46", "\u5e79", 8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u89e3", "\u61c9", "\u5b97", "\u4e01", "\u5ba3", "\u8cc1", "\u9127", "\u9b31", "\u55ae", "\u676d", 8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6d2a", "\u5305", "\u8af8", "\u5de6", "\u77f3", "\u5d14", "\u5409", "\u9215", "\u9f94", "\u7a0b", 8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5d47", "\u90a2", "\u6ed1", "\u88f4", "\u9678", "\u69ae", "\u7fc1", "\u8340", "\u7f8a", "\u65bc", 8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u60e0", "\u7504", "\u9eb4", "\u5bb6", "\u5c01", "\u82ae", "\u7fbf", "\u5132", "\u9773", "\u6c72", 8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u90b4", "\u7cdc", "\u677e", "\u4e95", "\u6bb5", "\u5bcc", "\u5deb", "\u70cf", "\u7126", "\u5df4", 8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5f13", "\u7267", "\u9697", "\u5c71", "\u8c37", "\u8eca", "\u4faf", "\u5b93", "\u84ec", "\u5168", 8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u90d7", "\u73ed", "\u4ef0", "\u79cb", "\u4ef2", "\u4f0a", "\u5bae", "\u5be7", "\u4ec7", "\u6b12", 8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u66b4", "\u7518", "\u659c", "\u53b2", "\u620e", "\u7956", "\u6b66", "\u7b26", "\u5289", "\u666f", 8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8a79", "\u675f", "\u9f8d", "\u8449", "\u5e78", "\u53f8", "\u97f6", "\u90dc", "\u9ece", "\u858a", 8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8584", "\u5370", "\u5bbf", "\u767d", "\u61f7", "\u84b2", "\u90b0", "\u5f9e", "\u9102", "\u7d22", 8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u54b8", "\u7c4d", "\u8cf4", "\u5353", "\u85fa", "\u5c60", "\u8499", "\u6c60", "\u55ac", "\u9670", 8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u9b31", "\u80e5", "\u80fd", "\u84bc", "\u96d9", "\u805e", "\u8398", "\u9ee8", "\u7fdf", "\u8b5a", 8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u8ca2", "\u52de", "\u9004", "\u59ec", "\u7533", "\u6276", "\u5835", "\u5189", "\u5bb0", "\u9148", 8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u96cd", "\u90e4", "\u74a9", "\u6851", "\u6842", "\u6fee", "\u725b", "\u58fd", "\u901a", "\u908a", 8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6248", "\u71d5", "\u5180", "\u90df", "\u6d66", "\u5c1a", "\u8fb2", "\u6eab", "\u5225", "\u838a", 8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u664f", "\u67f4", "\u77bf", "\u95bb", "\u5145", "\u6155", "\u9023", "\u8339", "\u7fd2", "\u5ba6", 8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u827e", "\u9b5a", "\u5bb9", "\u5411", "\u53e4", "\u6613", "\u614e", "\u6208", "\u5ed6", "\u5ebe", 8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u7d42", "\u66a8", "\u5c45", "\u8861", "\u6b65", "\u90fd", "\u803f", "\u6eff", "\u5f18", "\u5321", 8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u570b", "\u6587", "\u5bc7", "\u5ee3", "\u797f", "\u95d5", "\u6771", "\u6b50", "\u6bb3", "\u6c83", 8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5229", "\u851a", "\u8d8a", "\u5914", "\u9686", "\u5e2b", "\u978f", "\u5399", "\u8076", "\u6641", 8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u52fe", "\u6556", "\u878d", "\u51b7", "\u8a3e", "\u8f9b", "\u95de", "\u90a3", "\u7c21", "\u9952", 8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u7a7a", "\u66fe", "\u6bcb", "\u6c99", "\u4e5c", "\u990a", "\u97a0", "\u9808", "\u8c50", "\u5de2", 8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u95dc", "\u84af", "\u76f8", "\u67e5", "\u5f8c", "\u834a", "\u7d05", "\u904a", "\u7afa", "\u6b0a", 8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u9011", "\u84cb", "\u76ca", "\u6853", "\u516c", "\u4e07\u4fdf", "\u53f8\u99ac", "\u4e0a\u5b98", 8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6b50\u967d", "\u590f\u4faf", "\u8af8\u845b", "\u805e\u4eba", "\u6771\u65b9", "\u8d6b\u9023", 8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u7687\u752b", "\u5c09\u9072", "\u516c\u7f8a", "\u6fb9\u53f0", "\u516c\u51b6", "\u5b97\u653f", 8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6fee\u967d", "\u6df3\u4e8e", "\u55ae\u4e8e", "\u592a\u53d4", "\u7533\u5c60", "\u516c\u5b6b", 8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4ef2\u5b6b", "\u8ed2\u8f45", "\u4ee4\u72d0", "\u937e\u96e2", "\u5b87\u6587", "\u9577\u5b6b", 8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6155\u5bb9", "\u9bae\u4e8e", "\u95ad\u4e18", "\u53f8\u5f92", "\u53f8\u7a7a", "\u4e0c\u5b98", 8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u53f8\u5bc7", "\u4ec9", "\u7763", "\u5b50\u8eca", "\u9853\u5b6b", "\u7aef\u6728", "\u5deb\u99ac", 8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u516c\u897f", "\u6f06\u96d5", "\u6a02\u6b63", "\u58e4\u99df", "\u516c\u826f", "\u62d3\u62d4", 8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u593e\u8c37", "\u5bb0\u7236", "\u7a40\u6881", "\u6649", "\u695a", "\u95bb", "\u6cd5", "\u6c5d", "\u9122", 8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5857", "\u6b3d", "\u6bb5\u5e72", "\u767e\u91cc", "\u6771\u90ed", "\u5357\u9580", "\u547c\u5ef6", 8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u6b78", "\u6d77", "\u7f8a\u820c", "\u5fae\u751f", "\u5cb3", "\u5e25", "\u7df1", "\u4ea2", "\u6cc1", 8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5f8c", "\u6709", "\u7434", "\u6881\u4e18", "\u5de6\u4e18", "\u6771\u9580", "\u897f\u9580", "\u5546", 8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u725f", "\u4f58", "\u4f74", "\u4f2f", "\u8cde", "\u5357\u5bae", "\u58a8", "\u54c8", "\u8b59", "\u7b2a", 8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u5e74", "\u611b", "\u967d", "\u4f5f", "\u3401", "\u3422", "\u3426", "\u3493", "\u34A5", "\u34A7", 8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u34AA", "\u3536", "\u4A3B", "\u4E00", "\u4E01", "\u4E07", "\u4E0D", "\u4E17", "\u4E23", "\u4E26", 8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u4E34", "\u4E82", "\u4EB8", "\u4EB9", "\u511F", "\u512D", "\u513D", "\u513E", "\u53B5", "\u56D4", 8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u56D6", "\u7065", "\u7069", "\u706A", "\u7E9E", "\u9750", "\u9F49", "\u9F7E", "\u9F98", "\uD840\uDC35", 8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD840\uDC3D", "\uD840\uDC3E", "\uD840\uDC41", "\uD840\uDC46", "\uD840\uDC4C", "\uD840\uDC4E", 8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD840\uDC53", "\uD840\uDC55", "\uD840\uDC56", "\uD840\uDC5F", "\uD840\uDC60", "\uD840\uDC7A", 8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD840\uDC7B", "\uD840\uDCC8", "\uD840\uDD9E", "\uD840\uDD9F", "\uD840\uDDA0", "\uD840\uDDA1", 8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD841\uDD3B", "\uD842\uDCCA", "\uD842\uDCCB", "\uD842\uDD6C", "\uD842\uDE0B", "\uD842\uDE0C", 8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD842\uDED1", "\uD844\uDD9F", "\uD845\uDD19", "\uD845\uDD1A", "\uD846\uDD3B", "\uD84C\uDF5C", 8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD85A\uDDC4", "\uD85A\uDDC5", "\uD85C\uDD98", "\uD85E\uDCB1", "\uD861\uDC04", "\uD864\uDDD3", 8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\uD865\uDE63", "\uD869\uDCCA", "\uD86B\uDE9A", }; 8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Test AlphabeticIndex vs. root with script reordering. 8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestHaniFirst() { 8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll.setReorderCodes(UScript.HAN); 8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(coll); 8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 1, index.getBucketCount()); // ... (underflow only) 8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addLabels(ULocale.ENGLISH); 8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = index.getBucketIndex("\u897f"); 8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+897F)", 0, bucketIndex); // underflow bucket 8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("i"); 8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(i)", 9, bucketIndex); 8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("\u03B1"); 8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex); 8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. 8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005)); 8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); 9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("\uFFFF"); 9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); 9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Test AlphabeticIndex vs. Pinyin with script reordering. 9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestPinyinFirst() { 9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.CHINESE); 9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll.setReorderCodes(UScript.HAN); 9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(coll); 9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addLabels(ULocale.CHINESE); 9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 28, index.getBucketCount()); // ... A-Z ... 9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = index.getBucketIndex("\u897f"); 9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+897F)", 'X' - 'A' + 1, bucketIndex); 9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("i"); 9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(i)", 9, bucketIndex); 9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("\u03B1"); 9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(Greek alpha)", 27, bucketIndex); 9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // U+50005 is an unassigned code point which sorts at the end, independent of the Hani group. 9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex(UTF16.valueOf(0x50005)); 9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+50005)", 27, bucketIndex); 9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("\uFFFF"); 9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+FFFF)", 27, bucketIndex); 9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Test labels with multiple primary weights. 9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestSchSt() { 9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(ULocale.GERMAN); 9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addLabels(new UnicodeSet("[Æ{Sch*}{St*}]")); 9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ... A Æ B-R S Sch St T-Z ... 9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ImmutableIndex immIndex = index.buildImmutableIndex(); 9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 31, index.getBucketCount()); 9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("immutable getBucketCount()", 31, immIndex.getBucketCount()); 9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[][] testCases = new String[][] { 9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // name, bucket index, bucket label 9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Adelbert", "1", "A" }, 9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Afrika", "1", "A" }, 9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Æsculap", "2", "Æ" }, 9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Aesthet", "2", "Æ" }, 9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Berlin", "3", "B" }, 9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Rilke", "19", "R" }, 9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Sacher", "20", "S" }, 9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Seiler", "20", "S" }, 9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Sultan", "20", "S" }, 9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Schiller", "21", "Sch" }, 9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Steiff", "22", "St" }, 9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert { "Thomas", "23", "T" } 9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<String> labels = index.getBucketLabels(); 9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String[] testCase : testCases) { 9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String name = testCase[0]; 9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = Integer.valueOf(testCase[1]); 9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String label = testCase[2]; 9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String msg = "getBucketIndex(" + name + ")"; 9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(msg, bucketIndex, index.getBucketIndex(name)); 9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg = "immutable " + msg; 9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(msg, bucketIndex, immIndex.getBucketIndex(name)); 9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg = "bucket label (" + name + ")"; 9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(msg, label, labels.get(index.getBucketIndex(name))); 9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert msg = "immutable " + msg; 9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(msg, label, immIndex.getBucket(bucketIndex).getLabel()); 9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * With no real labels, there should be only the underflow label. 9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestNoLabels() { 9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); 9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex<Integer> index = new AlphabeticIndex<Integer>(coll); 9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addRecord("\u897f", 0); 9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addRecord("i", 0); 9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.addRecord("\u03B1", 0); 9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 1, index.getBucketCount()); // ... 9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Bucket<Integer> bucket = index.iterator().next(); 9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("underflow label type", LabelType.UNDERFLOW, bucket.getLabelType()); 9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("all records in the underflow bucket", 3, bucket.size()); 9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Test with the Bopomofo-phonetic tailoring. 9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestChineseZhuyin() { 9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(ULocale.forLanguageTag("zh-u-co-zhuyin")); 9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ImmutableIndex immIndex = index.buildImmutableIndex(); 9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketCount()", 38, immIndex.getBucketCount()); // ... ㄅ ㄆ ㄇ ㄈ ㄉ -- ㄩ ... 9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("label 1", "ㄅ", immIndex.getBucket(1).getLabel()); 9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("label 2", "ㄆ", immIndex.getBucket(2).getLabel()); 9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("label 3", "ㄇ", immIndex.getBucket(3).getLabel()); 9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("label 4", "ㄈ", immIndex.getBucket(4).getLabel()); 9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("label 5", "ㄉ", immIndex.getBucket(5).getLabel()); 9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestJapaneseKanji() { 9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(ULocale.JAPANESE); 9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex(); 10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // There are no index characters for Kanji in the Japanese standard collator. 10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // They should all go into the overflow bucket. 10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert final int[] kanji = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 }; 10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int overflowIndex = immIndex.getBucketCount() - 1; 10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(int i = 0; i < kanji.length; ++i) { 10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String msg = String.format("kanji[%d]=U+%04X in overflow bucket", i, kanji[i]); 10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals(msg, overflowIndex, immIndex.getBucketIndex(UTF16.valueOf(kanji[i]))); 10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestFrozenCollator() { 10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ticket #9472 10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedCollator coll = (RuleBasedCollator) Collator.getInstance(new ULocale("da")); 10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll.setStrength(Collator.IDENTICAL); 10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert coll.freeze(); 10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The AlphabeticIndex constructor used to throw an exception 10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // because it cloned the collator (which preserves frozenness) 10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and set the clone's strength to PRIMARY. 10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(coll); 10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("same strength as input Collator", 10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Collator.IDENTICAL, index.getCollator().getStrength()); 10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void TestChineseUnihan() { 10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex index = new AlphabeticIndex(new ULocale("zh-u-co-unihan")); 10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index.setMaxLabelCount(500); // ICU 54 default is 99. 10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert AlphabeticIndex.ImmutableIndex immIndex = index.buildImmutableIndex(); 10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketCount = immIndex.getBucketCount(); 10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(bucketCount < 216) { 10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // There should be at least an underflow and overflow label, 10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and one for each of 214 radicals, 10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and maybe additional labels for simplified radicals. 10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (ICU4C: dataerrln(), prints only a warning if the data is missing) 10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert errln("too few buckets/labels for Chinese/unihan: " + bucketCount + 10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert " (is zh/unihan data available?)"); 10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return; 10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert logln("Chinese/unihan has " + bucketCount + " buckets/labels"); 10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // bucketIndex = radical number, adjusted for simplified radicals in lower buckets. 10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int bucketIndex = index.getBucketIndex("\u4e5d"); 10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex); 10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bucketIndex = index.getBucketIndex("\u7527"); 10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assertEquals("getBucketIndex(U+7527)", 100, bucketIndex); 10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 1046