17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 1996-2012, International Business Machines Corporation and * 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved. * 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// copied from the Transliterator demo 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.util; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashSet; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Set; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.TreeSet; 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Transliterator; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Incrementally returns the set of all strings that case-fold to the same value. 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class CaseIterator { 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // testing stuff 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Transliterator toName = Transliterator.getInstance("[:^ascii:] Any-Name"); 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Transliterator toHex = Transliterator.getInstance("[:^ascii:] Any-Hex"); 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Transliterator toHex2 = Transliterator.getInstance("[[^\u0021-\u007F]-[,]] Any-Hex"); 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // global tables (could be precompiled) 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Map fromCaseFold = new HashMap(); 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Map toCaseFold = new HashMap(); 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int maxLength = 0; 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // This exception list is generated on the console by turning on the GENERATED flag, 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which MUST be false for normal operation. 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Once the list is generated, it is pasted in here. 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // A bit of a cludge, but this bootstrapping is the easiest way 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to get around certain complications in the data. 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean GENERATE = false; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean DUMP = false; 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static String[][] exceptionList = { 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // a\N{MODIFIER LETTER RIGHT HALF RING} 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"a\u02BE","A\u02BE","a\u02BE",}, 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ff 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"ff","FF","Ff","fF","ff",}, 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ffi 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"ffi","FFI","FFi","FfI","Ffi","F\uFB01","fFI","fFi","ffI","ffi","f\uFB01","\uFB00I","\uFB00i",}, 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ffl 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"ffl","FFL","FFl","FfL","Ffl","F\uFB02","fFL","fFl","ffL","ffl","f\uFB02","\uFB00L","\uFB00l",}, 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // fi 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"fi","FI","Fi","fI","fi",}, 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // fl 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"fl","FL","Fl","fL","fl",}, 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // h\N{COMBINING MACRON BELOW} 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"h\u0331","H\u0331","h\u0331",}, 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // i\N{COMBINING DOT ABOVE} 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"i\u0307","I\u0307","i\u0307",}, 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // j\N{COMBINING CARON} 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"j\u030C","J\u030C","j\u030C",}, 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ss 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"ss","SS","Ss","S\u017F","sS","ss","s\u017F","\u017FS","\u017Fs","\u017F\u017F",}, 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // st 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"st","ST","St","sT","st","\u017FT","\u017Ft",}, 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // t\N{COMBINING DIAERESIS} 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"t\u0308","T\u0308","t\u0308",}, 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // w\N{COMBINING RING ABOVE} 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"w\u030A","W\u030A","w\u030A",}, 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // y\N{COMBINING RING ABOVE} 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"y\u030A","Y\u030A","y\u030A",}, 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{MODIFIER LETTER APOSTROPHE}n 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u02BCn","\u02BCN","\u02BCn",}, 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH TONOS}\N{GREEK SMALL LETTER IOTA} 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03AC\u03B9","\u0386\u0345","\u0386\u0399","\u0386\u03B9","\u0386\u1FBE","\u03AC\u0345","\u03AC\u0399","\u03AC\u03B9","\u03AC\u1FBE",}, 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH TONOS}\N{GREEK SMALL LETTER IOTA} 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03AE\u03B9","\u0389\u0345","\u0389\u0399","\u0389\u03B9","\u0389\u1FBE","\u03AE\u0345","\u03AE\u0399","\u03AE\u03B9","\u03AE\u1FBE",}, 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI} 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B1\u0342","\u0391\u0342","\u03B1\u0342",}, 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B1\u0342\u03B9","\u0391\u0342\u0345","\u0391\u0342\u0399","\u0391\u0342\u03B9","\u0391\u0342\u1FBE", 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u03B1\u0342\u0345","\u03B1\u0342\u0399","\u03B1\u0342\u03B9","\u03B1\u0342\u1FBE","\u1FB6\u0345", 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u1FB6\u0399","\u1FB6\u03B9","\u1FB6\u1FBE",}, 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER IOTA} 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B1\u03B9","\u0391\u0345","\u0391\u0399","\u0391\u03B9","\u0391\u1FBE","\u03B1\u0345","\u03B1\u0399","\u03B1\u03B9","\u03B1\u1FBE",}, 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI} 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B7\u0342","\u0397\u0342","\u03B7\u0342",}, 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B7\u0342\u03B9","\u0397\u0342\u0345","\u0397\u0342\u0399","\u0397\u0342\u03B9","\u0397\u0342\u1FBE", 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u03B7\u0342\u0345","\u03B7\u0342\u0399","\u03B7\u0342\u03B9","\u03B7\u0342\u1FBE","\u1FC6\u0345","\u1FC6\u0399", 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u1FC6\u03B9","\u1FC6\u1FBE",}, 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA}\N{GREEK SMALL LETTER IOTA} 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B7\u03B9","\u0397\u0345","\u0397\u0399","\u0397\u03B9","\u0397\u1FBE","\u03B7\u0345","\u03B7\u0399","\u03B7\u03B9","\u03B7\u1FBE",}, 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT} 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B9\u0308\u0300","\u0345\u0308\u0300","\u0399\u0308\u0300","\u03B9\u0308\u0300","\u1FBE\u0308\u0300",}, 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT} 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B9\u0308\u0301","\u0345\u0308\u0301","\u0399\u0308\u0301","\u03B9\u0308\u0301","\u1FBE\u0308\u0301",}, 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI} 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B9\u0308\u0342","\u0345\u0308\u0342","\u0399\u0308\u0342","\u03B9\u0308\u0342","\u1FBE\u0308\u0342",}, 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER IOTA}\N{COMBINING GREEK PERISPOMENI} 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03B9\u0342","\u0345\u0342","\u0399\u0342","\u03B9\u0342","\u1FBE\u0342",}, 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER RHO}\N{COMBINING COMMA ABOVE} 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C1\u0313","\u03A1\u0313","\u03C1\u0313","\u03F1\u0313",}, 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT} 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0308\u0300","\u03A5\u0308\u0300","\u03C5\u0308\u0300",}, 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT} 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0308\u0301","\u03A5\u0308\u0301","\u03C5\u0308\u0301",}, 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI} 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0308\u0342","\u03A5\u0308\u0342","\u03C5\u0308\u0342",}, 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE} 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0313","\u03A5\u0313","\u03C5\u0313",}, 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GRAVE ACCENT} 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0313\u0300","\u03A5\u0313\u0300","\u03C5\u0313\u0300","\u1F50\u0300",}, 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING ACUTE ACCENT} 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0313\u0301","\u03A5\u0313\u0301","\u03C5\u0313\u0301","\u1F50\u0301",}, 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GREEK PERISPOMENI} 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0313\u0342","\u03A5\u0313\u0342","\u03C5\u0313\u0342","\u1F50\u0342",}, 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING GREEK PERISPOMENI} 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C5\u0342","\u03A5\u0342","\u03C5\u0342",}, 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI} 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C9\u0342","\u03A9\u0342","\u03C9\u0342","\u2126\u0342",}, 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C9\u0342\u03B9","\u03A9\u0342\u0345","\u03A9\u0342\u0399","\u03A9\u0342\u03B9","\u03A9\u0342\u1FBE","\u03C9\u0342\u0345","\u03C9\u0342\u0399","\u03C9\u0342\u03B9","\u03C9\u0342\u1FBE","\u1FF6\u0345", 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert "\u1FF6\u0399","\u1FF6\u03B9","\u1FF6\u1FBE","\u2126\u0342\u0345","\u2126\u0342\u0399","\u2126\u0342\u03B9","\u2126\u0342\u1FBE",}, 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA}\N{GREEK SMALL LETTER IOTA} 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03C9\u03B9","\u03A9\u0345","\u03A9\u0399","\u03A9\u03B9","\u03A9\u1FBE","\u03C9\u0345","\u03C9\u0399","\u03C9\u03B9","\u03C9\u1FBE","\u2126\u0345","\u2126\u0399","\u2126\u03B9","\u2126\u1FBE",}, 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH TONOS}\N{GREEK SMALL LETTER IOTA} 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u03CE\u03B9","\u038F\u0345","\u038F\u0399","\u038F\u03B9","\u038F\u1FBE","\u03CE\u0345","\u03CE\u0399","\u03CE\u03B9","\u03CE\u1FBE",}, 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER ECH}\N{ARMENIAN SMALL LETTER YIWN} 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u0565\u0582","\u0535\u0552","\u0535\u0582","\u0565\u0552","\u0565\u0582",}, 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER ECH} 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u0574\u0565","\u0544\u0535","\u0544\u0565","\u0574\u0535","\u0574\u0565",}, 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER INI} 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u0574\u056B","\u0544\u053B","\u0544\u056B","\u0574\u053B","\u0574\u056B",}, 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER XEH} 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u0574\u056D","\u0544\u053D","\u0544\u056D","\u0574\u053D","\u0574\u056D",}, 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER NOW} 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u0574\u0576","\u0544\u0546","\u0544\u0576","\u0574\u0546","\u0574\u0576",}, 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{ARMENIAN SMALL LETTER VEW}\N{ARMENIAN SMALL LETTER NOW} 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u057E\u0576","\u054E\u0546","\u054E\u0576","\u057E\u0546","\u057E\u0576",}, 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH PSILI}\N{GREEK SMALL LETTER IOTA} 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F00\u03B9","\u1F00\u0345","\u1F00\u0399","\u1F00\u03B9","\u1F00\u1FBE","\u1F08\u0345","\u1F08\u0399","\u1F08\u03B9","\u1F08\u1FBE",}, 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH DASIA}\N{GREEK SMALL LETTER IOTA} 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F01\u03B9","\u1F01\u0345","\u1F01\u0399","\u1F01\u03B9","\u1F01\u1FBE","\u1F09\u0345","\u1F09\u0399","\u1F09\u03B9","\u1F09\u1FBE",}, 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA} 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F02\u03B9","\u1F02\u0345","\u1F02\u0399","\u1F02\u03B9","\u1F02\u1FBE","\u1F0A\u0345","\u1F0A\u0399","\u1F0A\u03B9","\u1F0A\u1FBE",}, 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA} 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F03\u03B9","\u1F03\u0345","\u1F03\u0399","\u1F03\u03B9","\u1F03\u1FBE","\u1F0B\u0345","\u1F0B\u0399","\u1F0B\u03B9","\u1F0B\u1FBE",}, 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA} 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F04\u03B9","\u1F04\u0345","\u1F04\u0399","\u1F04\u03B9","\u1F04\u1FBE","\u1F0C\u0345","\u1F0C\u0399","\u1F0C\u03B9","\u1F0C\u1FBE",}, 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA} 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F05\u03B9","\u1F05\u0345","\u1F05\u0399","\u1F05\u03B9","\u1F05\u1FBE","\u1F0D\u0345","\u1F0D\u0399","\u1F0D\u03B9","\u1F0D\u1FBE",}, 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F06\u03B9","\u1F06\u0345","\u1F06\u0399","\u1F06\u03B9","\u1F06\u1FBE","\u1F0E\u0345","\u1F0E\u0399","\u1F0E\u03B9","\u1F0E\u1FBE",}, 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F07\u03B9","\u1F07\u0345","\u1F07\u0399","\u1F07\u03B9","\u1F07\u1FBE","\u1F0F\u0345","\u1F0F\u0399","\u1F0F\u03B9","\u1F0F\u1FBE",}, 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH PSILI}\N{GREEK SMALL LETTER IOTA} 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F20\u03B9","\u1F20\u0345","\u1F20\u0399","\u1F20\u03B9","\u1F20\u1FBE","\u1F28\u0345","\u1F28\u0399","\u1F28\u03B9","\u1F28\u1FBE",}, 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH DASIA}\N{GREEK SMALL LETTER IOTA} 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F21\u03B9","\u1F21\u0345","\u1F21\u0399","\u1F21\u03B9","\u1F21\u1FBE","\u1F29\u0345","\u1F29\u0399","\u1F29\u03B9","\u1F29\u1FBE",}, 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA} 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F22\u03B9","\u1F22\u0345","\u1F22\u0399","\u1F22\u03B9","\u1F22\u1FBE","\u1F2A\u0345","\u1F2A\u0399","\u1F2A\u03B9","\u1F2A\u1FBE",}, 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA} 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F23\u03B9","\u1F23\u0345","\u1F23\u0399","\u1F23\u03B9","\u1F23\u1FBE","\u1F2B\u0345","\u1F2B\u0399","\u1F2B\u03B9","\u1F2B\u1FBE",}, 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA} 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F24\u03B9","\u1F24\u0345","\u1F24\u0399","\u1F24\u03B9","\u1F24\u1FBE","\u1F2C\u0345","\u1F2C\u0399","\u1F2C\u03B9","\u1F2C\u1FBE",}, 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA} 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F25\u03B9","\u1F25\u0345","\u1F25\u0399","\u1F25\u03B9","\u1F25\u1FBE","\u1F2D\u0345","\u1F2D\u0399","\u1F2D\u03B9","\u1F2D\u1FBE",}, 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F26\u03B9","\u1F26\u0345","\u1F26\u0399","\u1F26\u03B9","\u1F26\u1FBE","\u1F2E\u0345","\u1F2E\u0399","\u1F2E\u03B9","\u1F2E\u1FBE",}, 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F27\u03B9","\u1F27\u0345","\u1F27\u0399","\u1F27\u03B9","\u1F27\u1FBE","\u1F2F\u0345","\u1F2F\u0399","\u1F2F\u03B9","\u1F2F\u1FBE",}, 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH PSILI}\N{GREEK SMALL LETTER IOTA} 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F60\u03B9","\u1F60\u0345","\u1F60\u0399","\u1F60\u03B9","\u1F60\u1FBE","\u1F68\u0345","\u1F68\u0399","\u1F68\u03B9","\u1F68\u1FBE",}, 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH DASIA}\N{GREEK SMALL LETTER IOTA} 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F61\u03B9","\u1F61\u0345","\u1F61\u0399","\u1F61\u03B9","\u1F61\u1FBE","\u1F69\u0345","\u1F69\u0399","\u1F69\u03B9","\u1F69\u1FBE",}, 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA} 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F62\u03B9","\u1F62\u0345","\u1F62\u0399","\u1F62\u03B9","\u1F62\u1FBE","\u1F6A\u0345","\u1F6A\u0399","\u1F6A\u03B9","\u1F6A\u1FBE",}, 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA} 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F63\u03B9","\u1F63\u0345","\u1F63\u0399","\u1F63\u03B9","\u1F63\u1FBE","\u1F6B\u0345","\u1F6B\u0399","\u1F6B\u03B9","\u1F6B\u1FBE",}, 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA} 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F64\u03B9","\u1F64\u0345","\u1F64\u0399","\u1F64\u03B9","\u1F64\u1FBE","\u1F6C\u0345","\u1F6C\u0399","\u1F6C\u03B9","\u1F6C\u1FBE",}, 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA} 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F65\u03B9","\u1F65\u0345","\u1F65\u0399","\u1F65\u03B9","\u1F65\u1FBE","\u1F6D\u0345","\u1F6D\u0399","\u1F6D\u03B9","\u1F6D\u1FBE",}, 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F66\u03B9","\u1F66\u0345","\u1F66\u0399","\u1F66\u03B9","\u1F66\u1FBE","\u1F6E\u0345","\u1F6E\u0399","\u1F6E\u03B9","\u1F6E\u1FBE",}, 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA} 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F67\u03B9","\u1F67\u0345","\u1F67\u0399","\u1F67\u03B9","\u1F67\u1FBE","\u1F6F\u0345","\u1F6F\u0399","\u1F6F\u03B9","\u1F6F\u1FBE",}, 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ALPHA WITH VARIA}\N{GREEK SMALL LETTER IOTA} 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F70\u03B9","\u1F70\u0345","\u1F70\u0399","\u1F70\u03B9","\u1F70\u1FBE","\u1FBA\u0345","\u1FBA\u0399","\u1FBA\u03B9","\u1FBA\u1FBE",}, 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER ETA WITH VARIA}\N{GREEK SMALL LETTER IOTA} 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F74\u03B9","\u1F74\u0345","\u1F74\u0399","\u1F74\u03B9","\u1F74\u1FBE","\u1FCA\u0345","\u1FCA\u0399","\u1FCA\u03B9","\u1FCA\u1FBE",}, 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // \N{GREEK SMALL LETTER OMEGA WITH VARIA}\N{GREEK SMALL LETTER IOTA} 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert {"\u1F7C\u03B9","\u1F7C\u0345","\u1F7C\u0399","\u1F7C\u03B9","\u1F7C\u1FBE","\u1FFA\u0345","\u1FFA\u0399","\u1FFA\u03B9","\u1FFA\u1FBE",}, 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert }; 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // this initializes the data used to generated the case-equivalents 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static { 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Gather up the exceptions in a form we can use 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!GENERATE) { 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < exceptionList.length; ++i) { 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] exception = exceptionList[i]; 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set s = new HashSet(); 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // there has to be some method to do the following, but I can't find it in the collections 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j = 0; j < exception.length; ++j) { 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert s.add(exception[j]); 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fromCaseFold.put(exception[0], s); 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // walk through all the characters, and at every case fold result, 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // put a set of all the characters that map to that result 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean defaultmapping = true; // false for turkish 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i <= 0x10FFFF; ++i) { 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cat = UCharacter.getType(i); 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cat == Character.UNASSIGNED || cat == Character.PRIVATE_USE) continue; 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String cp = UTF16.valueOf(i); 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String mapped = UCharacter.foldCase(cp, defaultmapping); 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (mapped.equals(cp)) continue; 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (maxLength < mapped.length()) maxLength = mapped.length(); 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // at this point, have different case folding 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set s = (Set) fromCaseFold.get(mapped); 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (s == null) { 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert s = new HashSet(); 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert s.add(mapped); // add the case fold result itself 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fromCaseFold.put(mapped, s); 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert s.add(cp); 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert toCaseFold.put(cp, mapped); 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert toCaseFold.put(mapped, mapped); // add mapping to self 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Emit the final data 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (DUMP) { 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("maxLength = " + maxLength); 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("\nfromCaseFold:"); 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator it = fromCaseFold.keySet().iterator(); 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.hasNext()) { 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Object key = it.next(); 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.print(" " + toHex2.transliterate((String)key) + ": "); 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set s = (Set) fromCaseFold.get(key); 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator it2 = s.iterator(); 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean first = true; 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it2.hasNext()) { 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (first) { 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert first = false; 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.print(", "); 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.print(toHex2.transliterate((String)it2.next())); 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(""); 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("\ntoCaseFold:"); 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it = toCaseFold.keySet().iterator(); 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.hasNext()) { 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key = (String) it.next(); 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String value = (String) toCaseFold.get(key); 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(" " + toHex2.transliterate(key) + ": " + toHex2.transliterate(value)); 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Now convert all those sets into linear arrays 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We can't do this in place in Java, so make a temporary target array 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: This could be transformed into a single array, with offsets into it. 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Might be best choice in C. 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Map fromCaseFold2 = new HashMap(); 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator it = fromCaseFold.keySet().iterator(); 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.hasNext()) { 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Object key = it.next(); 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set s = (Set) fromCaseFold.get(key); 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] temp = new String[s.size()]; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert s.toArray(temp); 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fromCaseFold2.put(key, temp); 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fromCaseFold = fromCaseFold2; 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We have processed everything, so the iterator will now work 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The following is normally OFF. 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // It is here to generate (under the GENERATE flag) the static exception list. 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // It must be at the very end of initialization, so that the iterator is functional. 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (easiest to do it that way) 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (GENERATE) { 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // first get small set of items that have multiple characters 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set multichars = new TreeSet(); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it = fromCaseFold.keySet().iterator(); 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.hasNext()) { 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key = (String) it.next(); 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.countCodePoint(key) < 2) continue; 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert multichars.add(key); 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // now we will go through each of them. 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CaseIterator ci = new CaseIterator(); 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert it = multichars.iterator(); 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it.hasNext()) { 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key = (String) it.next(); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // here is a nasty complication. Take 'ffi' ligature. We 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // can't just close it, since we would miss the combination 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // that includes the 'fi' => "fi" ligature 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // so first do a pass through, and add substring combinations 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // we call this a 'partial closure' 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set partialClosure = new TreeSet(); 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert partialClosure.add(key); 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (UTF16.countCodePoint(key) > 2) { 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator multiIt2 = multichars.iterator(); 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (multiIt2.hasNext()) { 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String otherKey = (String) multiIt2.next(); 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (otherKey.length() >= key.length()) continue; 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos = -1; 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (true) { 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The following is not completely general 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // but works for the actual cased stuff, 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and should work for future characters, since we won't have 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // more ligatures & other oddities. 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos = key.indexOf(otherKey, pos+1); 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (pos < 0) break; 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int endPos = pos + otherKey.length(); 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // we know we have a proper substring, 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // so get the combinations 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] choices = (String[]) fromCaseFold.get(otherKey); 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int ii = 0; ii < choices.length; ++ii) { 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String patchwork = key.substring(0, pos) 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + choices[ii] 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert + key.substring(endPos); 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert partialClosure.add(patchwork); 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // now, for each thing in the partial closure, get its 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // case closure and add it to the final result. 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Set closure = new TreeSet(); // this will be the real closure 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator partialIt = partialClosure.iterator(); 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (partialIt.hasNext()) { 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String key2 = (String) partialIt.next(); 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.reset(key2); 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String temp = ci.next(); temp != null; temp = ci.next()) { 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert closure.add(temp); 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // form closure 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*String[] choices = (String[]) fromCaseFold.get(key2); 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < choices.length; ++i) { 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.reset(choices[i]); 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String temp; 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (null != (temp = ci.next())) { 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert closure.add(temp); 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // print it out, so that it can be cut and pasted back into this document. 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Iterator it2 = closure.iterator(); 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("\t// " + toName.transliterate(key)); 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.print("\t{\"" + toHex.transliterate(key) + "\","); 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (it2.hasNext()) { 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String item = (String)it2.next(); 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.print("\"" + toHex.transliterate(item) + "\","); 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("},"); 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ============ PRIVATE CLASS DATA ============ 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // pieces that we will put together 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // is not changed during iteration 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int count = 0; 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String[][] variants; 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // state information, changes during iteration 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private boolean done = false; 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int[] counts; 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // internal buffer for efficiency 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private StringBuffer nextBuffer = new StringBuffer(); 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ======================== 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Reset to different source. Once reset, the iteration starts from the beginning. 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param source The string to get case variants for 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void reset(String source) { 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // allocate arrays to store pieces 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // using length might be slightly too long, but we don't care much 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert counts = new int[source.length()]; 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert variants = new String[source.length()][]; 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // walk through the source, and break up into pieces 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // each piece becomes an array of equivalent values 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO: could optimized this later to coalesce all single string pieces 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String piece = null; 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count = 0; 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < source.length(); i += piece.length()) { 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // find *longest* matching piece 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String caseFold = null; 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (GENERATE) { 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // do exactly one CP 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert piece = UTF16.valueOf(source, i); 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert caseFold = (String) toCaseFold.get(piece); 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int max = i + maxLength; 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (max > source.length()) max = source.length(); 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j = max; j > i; --j) { 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert piece = source.substring(i, j); 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert caseFold = (String) toCaseFold.get(piece); 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (caseFold != null) break; 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we fail, pick one code point 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (caseFold == null) { 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert piece = UTF16.valueOf(source, i); 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert variants[count++] = new String[] {piece}; // single item string 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert variants[count++] = (String[])fromCaseFold.get(caseFold); 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert reset(); 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Restart the iteration from the beginning, but with same source 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void reset() { 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert done = false; 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < count; ++i) { 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert counts[i] = 0; 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterates through the case variants. 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return next case variant. Each variant will case-fold to the same value as the source will. 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * When the iteration is done, null is returned. 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String next() { 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (done) return null; 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int i; 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Optimize so we keep the piece before and after the current position 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // so we don't have so much concatenation 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // get the result, a concatenation 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert nextBuffer.setLength(0); 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (i = 0; i < count; ++i) { 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert nextBuffer.append(variants[i][counts[i]]); 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // find the next right set of pieces to concatenate 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (i = count-1; i >= 0; --i) { 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert counts[i]++; 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (counts[i] < variants[i].length) break; 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert counts[i] = 0; 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we go too far, bail 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (i < 0) { 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert done = true; 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return nextBuffer.toString(); 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Temporary test, just to see how the stuff works. 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static public void main(String[] args) { 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String[] testCases = {"fiss", "h\u03a3"}; 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CaseIterator ci = new CaseIterator(); 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < testCases.length; ++i) { 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String item = testCases[i]; 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(); 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("Testing: " + toName.transliterate(item)); 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(); 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.reset(item); 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = 0; 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String temp = ci.next(); temp != null; temp = ci.next()) { 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println(toName.transliterate(temp)); 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert count++; 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("Total: " + count); 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // generate a list of all caseless characters -- characters whose 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // case closure is themselves. 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet caseless = new UnicodeSet(); 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i <= 0x10FFFF; ++i) { 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String cp = UTF16.valueOf(i); 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ci.reset(cp); 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int count = 0; 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert String fold = null; 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (String temp = ci.next(); temp != null; temp = ci.next()) { 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert fold = temp; 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (++count > 1) break; 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (count==1 && fold.equals(cp)) { 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert caseless.add(i); 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("caseless = " + caseless.toPattern(true)); 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet not_lc = new UnicodeSet("[:^lc:]"); 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeSet a = new UnicodeSet(); 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert a.set(not_lc); 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert a.removeAll(caseless); 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("[:^lc:] - caseless = " + a.toPattern(true)); 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert a.set(caseless); 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert a.removeAll(not_lc); 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.out.println("caseless - [:^lc:] = " + a.toPattern(true)); 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 564