17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 1996-2012, International Business Machines Corporation and    *
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* others. All Rights Reserved.                                                *
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// copied from the Transliterator demo
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.util;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashSet;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Set;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.TreeSet;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UCharacter;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Transliterator;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Incrementally returns the set of all strings that case-fold to the same value.
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic class CaseIterator {
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // testing stuff
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static Transliterator toName = Transliterator.getInstance("[:^ascii:] Any-Name");
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static Transliterator toHex = Transliterator.getInstance("[:^ascii:] Any-Hex");
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static Transliterator toHex2 = Transliterator.getInstance("[[^\u0021-\u007F]-[,]] Any-Hex");
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // global tables (could be precompiled)
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static Map fromCaseFold = new HashMap();
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static Map toCaseFold = new HashMap();
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int maxLength = 0;
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // This exception list is generated on the console by turning on the GENERATED flag,
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // which MUST be false for normal operation.
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Once the list is generated, it is pasted in here.
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // A bit of a cludge, but this bootstrapping is the easiest way
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // to get around certain complications in the data.
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean GENERATE = false;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean DUMP = false;
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static String[][] exceptionList = {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // a\N{MODIFIER LETTER RIGHT HALF RING}
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"a\u02BE","A\u02BE","a\u02BE",},
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ff
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"ff","FF","Ff","fF","ff",},
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ffi
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"ffi","FFI","FFi","FfI","Ffi","F\uFB01","fFI","fFi","ffI","ffi","f\uFB01","\uFB00I","\uFB00i",},
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ffl
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"ffl","FFL","FFl","FfL","Ffl","F\uFB02","fFL","fFl","ffL","ffl","f\uFB02","\uFB00L","\uFB00l",},
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // fi
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"fi","FI","Fi","fI","fi",},
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // fl
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"fl","FL","Fl","fL","fl",},
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // h\N{COMBINING MACRON BELOW}
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"h\u0331","H\u0331","h\u0331",},
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // i\N{COMBINING DOT ABOVE}
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"i\u0307","I\u0307","i\u0307",},
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // j\N{COMBINING CARON}
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"j\u030C","J\u030C","j\u030C",},
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // ss
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"ss","SS","Ss","S\u017F","sS","ss","s\u017F","\u017FS","\u017Fs","\u017F\u017F",},
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // st
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"st","ST","St","sT","st","\u017FT","\u017Ft",},
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // t\N{COMBINING DIAERESIS}
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"t\u0308","T\u0308","t\u0308",},
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // w\N{COMBINING RING ABOVE}
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"w\u030A","W\u030A","w\u030A",},
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // y\N{COMBINING RING ABOVE}
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"y\u030A","Y\u030A","y\u030A",},
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{MODIFIER LETTER APOSTROPHE}n
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u02BCn","\u02BCN","\u02BCn",},
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03AC\u03B9","\u0386\u0345","\u0386\u0399","\u0386\u03B9","\u0386\u1FBE","\u03AC\u0345","\u03AC\u0399","\u03AC\u03B9","\u03AC\u1FBE",},
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03AE\u03B9","\u0389\u0345","\u0389\u0399","\u0389\u03B9","\u0389\u1FBE","\u03AE\u0345","\u03AE\u0399","\u03AE\u03B9","\u03AE\u1FBE",},
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B1\u0342","\u0391\u0342","\u03B1\u0342",},
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B1\u0342\u03B9","\u0391\u0342\u0345","\u0391\u0342\u0399","\u0391\u0342\u03B9","\u0391\u0342\u1FBE",
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u03B1\u0342\u0345","\u03B1\u0342\u0399","\u03B1\u0342\u03B9","\u03B1\u0342\u1FBE","\u1FB6\u0345",
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u1FB6\u0399","\u1FB6\u03B9","\u1FB6\u1FBE",},
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA}\N{GREEK SMALL LETTER IOTA}
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B1\u03B9","\u0391\u0345","\u0391\u0399","\u0391\u03B9","\u0391\u1FBE","\u03B1\u0345","\u03B1\u0399","\u03B1\u03B9","\u03B1\u1FBE",},
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B7\u0342","\u0397\u0342","\u03B7\u0342",},
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B7\u0342\u03B9","\u0397\u0342\u0345","\u0397\u0342\u0399","\u0397\u0342\u03B9","\u0397\u0342\u1FBE",
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u03B7\u0342\u0345","\u03B7\u0342\u0399","\u03B7\u0342\u03B9","\u03B7\u0342\u1FBE","\u1FC6\u0345","\u1FC6\u0399",
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u1FC6\u03B9","\u1FC6\u1FBE",},
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA}\N{GREEK SMALL LETTER IOTA}
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B7\u03B9","\u0397\u0345","\u0397\u0399","\u0397\u03B9","\u0397\u1FBE","\u03B7\u0345","\u03B7\u0399","\u03B7\u03B9","\u03B7\u1FBE",},
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B9\u0308\u0300","\u0345\u0308\u0300","\u0399\u0308\u0300","\u03B9\u0308\u0300","\u1FBE\u0308\u0300",},
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B9\u0308\u0301","\u0345\u0308\u0301","\u0399\u0308\u0301","\u03B9\u0308\u0301","\u1FBE\u0308\u0301",},
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B9\u0308\u0342","\u0345\u0308\u0342","\u0399\u0308\u0342","\u03B9\u0308\u0342","\u1FBE\u0308\u0342",},
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER IOTA}\N{COMBINING GREEK PERISPOMENI}
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03B9\u0342","\u0345\u0342","\u0399\u0342","\u03B9\u0342","\u1FBE\u0342",},
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER RHO}\N{COMBINING COMMA ABOVE}
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C1\u0313","\u03A1\u0313","\u03C1\u0313","\u03F1\u0313",},
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GRAVE ACCENT}
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0308\u0300","\u03A5\u0308\u0300","\u03C5\u0308\u0300",},
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING ACUTE ACCENT}
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0308\u0301","\u03A5\u0308\u0301","\u03C5\u0308\u0301",},
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING DIAERESIS}\N{COMBINING GREEK PERISPOMENI}
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0308\u0342","\u03A5\u0308\u0342","\u03C5\u0308\u0342",},
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0313","\u03A5\u0313","\u03C5\u0313",},
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GRAVE ACCENT}
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0313\u0300","\u03A5\u0313\u0300","\u03C5\u0313\u0300","\u1F50\u0300",},
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING ACUTE ACCENT}
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0313\u0301","\u03A5\u0313\u0301","\u03C5\u0313\u0301","\u1F50\u0301",},
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING COMMA ABOVE}\N{COMBINING GREEK PERISPOMENI}
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0313\u0342","\u03A5\u0313\u0342","\u03C5\u0313\u0342","\u1F50\u0342",},
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER UPSILON}\N{COMBINING GREEK PERISPOMENI}
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C5\u0342","\u03A5\u0342","\u03C5\u0342",},
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C9\u0342","\u03A9\u0342","\u03C9\u0342","\u2126\u0342",},
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA}\N{COMBINING GREEK PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C9\u0342\u03B9","\u03A9\u0342\u0345","\u03A9\u0342\u0399","\u03A9\u0342\u03B9","\u03A9\u0342\u1FBE","\u03C9\u0342\u0345","\u03C9\u0342\u0399","\u03C9\u0342\u03B9","\u03C9\u0342\u1FBE","\u1FF6\u0345",
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            "\u1FF6\u0399","\u1FF6\u03B9","\u1FF6\u1FBE","\u2126\u0342\u0345","\u2126\u0342\u0399","\u2126\u0342\u03B9","\u2126\u0342\u1FBE",},
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA}\N{GREEK SMALL LETTER IOTA}
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03C9\u03B9","\u03A9\u0345","\u03A9\u0399","\u03A9\u03B9","\u03A9\u1FBE","\u03C9\u0345","\u03C9\u0399","\u03C9\u03B9","\u03C9\u1FBE","\u2126\u0345","\u2126\u0399","\u2126\u03B9","\u2126\u1FBE",},
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH TONOS}\N{GREEK SMALL LETTER IOTA}
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u03CE\u03B9","\u038F\u0345","\u038F\u0399","\u038F\u03B9","\u038F\u1FBE","\u03CE\u0345","\u03CE\u0399","\u03CE\u03B9","\u03CE\u1FBE",},
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER ECH}\N{ARMENIAN SMALL LETTER YIWN}
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u0565\u0582","\u0535\u0552","\u0535\u0582","\u0565\u0552","\u0565\u0582",},
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER ECH}
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u0574\u0565","\u0544\u0535","\u0544\u0565","\u0574\u0535","\u0574\u0565",},
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER INI}
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u0574\u056B","\u0544\u053B","\u0544\u056B","\u0574\u053B","\u0574\u056B",},
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER XEH}
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u0574\u056D","\u0544\u053D","\u0544\u056D","\u0574\u053D","\u0574\u056D",},
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER MEN}\N{ARMENIAN SMALL LETTER NOW}
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u0574\u0576","\u0544\u0546","\u0544\u0576","\u0574\u0546","\u0574\u0576",},
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{ARMENIAN SMALL LETTER VEW}\N{ARMENIAN SMALL LETTER NOW}
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u057E\u0576","\u054E\u0546","\u054E\u0576","\u057E\u0546","\u057E\u0576",},
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F00\u03B9","\u1F00\u0345","\u1F00\u0399","\u1F00\u03B9","\u1F00\u1FBE","\u1F08\u0345","\u1F08\u0399","\u1F08\u03B9","\u1F08\u1FBE",},
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F01\u03B9","\u1F01\u0345","\u1F01\u0399","\u1F01\u03B9","\u1F01\u1FBE","\u1F09\u0345","\u1F09\u0399","\u1F09\u03B9","\u1F09\u1FBE",},
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F02\u03B9","\u1F02\u0345","\u1F02\u0399","\u1F02\u03B9","\u1F02\u1FBE","\u1F0A\u0345","\u1F0A\u0399","\u1F0A\u03B9","\u1F0A\u1FBE",},
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F03\u03B9","\u1F03\u0345","\u1F03\u0399","\u1F03\u03B9","\u1F03\u1FBE","\u1F0B\u0345","\u1F0B\u0399","\u1F0B\u03B9","\u1F0B\u1FBE",},
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F04\u03B9","\u1F04\u0345","\u1F04\u0399","\u1F04\u03B9","\u1F04\u1FBE","\u1F0C\u0345","\u1F0C\u0399","\u1F0C\u03B9","\u1F0C\u1FBE",},
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F05\u03B9","\u1F05\u0345","\u1F05\u0399","\u1F05\u03B9","\u1F05\u1FBE","\u1F0D\u0345","\u1F0D\u0399","\u1F0D\u03B9","\u1F0D\u1FBE",},
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F06\u03B9","\u1F06\u0345","\u1F06\u0399","\u1F06\u03B9","\u1F06\u1FBE","\u1F0E\u0345","\u1F0E\u0399","\u1F0E\u03B9","\u1F0E\u1FBE",},
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F07\u03B9","\u1F07\u0345","\u1F07\u0399","\u1F07\u03B9","\u1F07\u1FBE","\u1F0F\u0345","\u1F0F\u0399","\u1F0F\u03B9","\u1F0F\u1FBE",},
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F20\u03B9","\u1F20\u0345","\u1F20\u0399","\u1F20\u03B9","\u1F20\u1FBE","\u1F28\u0345","\u1F28\u0399","\u1F28\u03B9","\u1F28\u1FBE",},
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F21\u03B9","\u1F21\u0345","\u1F21\u0399","\u1F21\u03B9","\u1F21\u1FBE","\u1F29\u0345","\u1F29\u0399","\u1F29\u03B9","\u1F29\u1FBE",},
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F22\u03B9","\u1F22\u0345","\u1F22\u0399","\u1F22\u03B9","\u1F22\u1FBE","\u1F2A\u0345","\u1F2A\u0399","\u1F2A\u03B9","\u1F2A\u1FBE",},
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F23\u03B9","\u1F23\u0345","\u1F23\u0399","\u1F23\u03B9","\u1F23\u1FBE","\u1F2B\u0345","\u1F2B\u0399","\u1F2B\u03B9","\u1F2B\u1FBE",},
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F24\u03B9","\u1F24\u0345","\u1F24\u0399","\u1F24\u03B9","\u1F24\u1FBE","\u1F2C\u0345","\u1F2C\u0399","\u1F2C\u03B9","\u1F2C\u1FBE",},
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F25\u03B9","\u1F25\u0345","\u1F25\u0399","\u1F25\u03B9","\u1F25\u1FBE","\u1F2D\u0345","\u1F2D\u0399","\u1F2D\u03B9","\u1F2D\u1FBE",},
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F26\u03B9","\u1F26\u0345","\u1F26\u0399","\u1F26\u03B9","\u1F26\u1FBE","\u1F2E\u0345","\u1F2E\u0399","\u1F2E\u03B9","\u1F2E\u1FBE",},
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F27\u03B9","\u1F27\u0345","\u1F27\u0399","\u1F27\u03B9","\u1F27\u1FBE","\u1F2F\u0345","\u1F2F\u0399","\u1F2F\u03B9","\u1F2F\u1FBE",},
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH PSILI}\N{GREEK SMALL LETTER IOTA}
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F60\u03B9","\u1F60\u0345","\u1F60\u0399","\u1F60\u03B9","\u1F60\u1FBE","\u1F68\u0345","\u1F68\u0399","\u1F68\u03B9","\u1F68\u1FBE",},
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH DASIA}\N{GREEK SMALL LETTER IOTA}
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F61\u03B9","\u1F61\u0345","\u1F61\u0399","\u1F61\u03B9","\u1F61\u1FBE","\u1F69\u0345","\u1F69\u0399","\u1F69\u03B9","\u1F69\u1FBE",},
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA}\N{GREEK SMALL LETTER IOTA}
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F62\u03B9","\u1F62\u0345","\u1F62\u0399","\u1F62\u03B9","\u1F62\u1FBE","\u1F6A\u0345","\u1F6A\u0399","\u1F6A\u03B9","\u1F6A\u1FBE",},
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA}\N{GREEK SMALL LETTER IOTA}
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F63\u03B9","\u1F63\u0345","\u1F63\u0399","\u1F63\u03B9","\u1F63\u1FBE","\u1F6B\u0345","\u1F6B\u0399","\u1F6B\u03B9","\u1F6B\u1FBE",},
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA}\N{GREEK SMALL LETTER IOTA}
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F64\u03B9","\u1F64\u0345","\u1F64\u0399","\u1F64\u03B9","\u1F64\u1FBE","\u1F6C\u0345","\u1F6C\u0399","\u1F6C\u03B9","\u1F6C\u1FBE",},
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA}\N{GREEK SMALL LETTER IOTA}
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F65\u03B9","\u1F65\u0345","\u1F65\u0399","\u1F65\u03B9","\u1F65\u1FBE","\u1F6D\u0345","\u1F6D\u0399","\u1F6D\u03B9","\u1F6D\u1FBE",},
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F66\u03B9","\u1F66\u0345","\u1F66\u0399","\u1F66\u03B9","\u1F66\u1FBE","\u1F6E\u0345","\u1F6E\u0399","\u1F6E\u03B9","\u1F6E\u1FBE",},
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI}\N{GREEK SMALL LETTER IOTA}
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F67\u03B9","\u1F67\u0345","\u1F67\u0399","\u1F67\u03B9","\u1F67\u1FBE","\u1F6F\u0345","\u1F6F\u0399","\u1F6F\u03B9","\u1F6F\u1FBE",},
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ALPHA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F70\u03B9","\u1F70\u0345","\u1F70\u0399","\u1F70\u03B9","\u1F70\u1FBE","\u1FBA\u0345","\u1FBA\u0399","\u1FBA\u03B9","\u1FBA\u1FBE",},
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER ETA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F74\u03B9","\u1F74\u0345","\u1F74\u0399","\u1F74\u03B9","\u1F74\u1FBE","\u1FCA\u0345","\u1FCA\u0399","\u1FCA\u03B9","\u1FCA\u1FBE",},
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // \N{GREEK SMALL LETTER OMEGA WITH VARIA}\N{GREEK SMALL LETTER IOTA}
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        {"\u1F7C\u03B9","\u1F7C\u0345","\u1F7C\u0399","\u1F7C\u03B9","\u1F7C\u1FBE","\u1FFA\u0345","\u1FFA\u0399","\u1FFA\u03B9","\u1FFA\u1FBE",},
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // this initializes the data used to generated the case-equivalents
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static {
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Gather up the exceptions in a form we can use
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!GENERATE) {
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < exceptionList.length; ++i) {
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String[] exception = exceptionList[i];
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Set s = new HashSet();
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // there has to be some method to do the following, but I can't find it in the collections
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int j = 0; j < exception.length; ++j) {
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    s.add(exception[j]);
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fromCaseFold.put(exception[0], s);
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // walk through all the characters, and at every case fold result,
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // put a set of all the characters that map to that result
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean defaultmapping = true; // false for turkish
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i <= 0x10FFFF; ++i) {
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cat = UCharacter.getType(i);
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cat == Character.UNASSIGNED || cat == Character.PRIVATE_USE) continue;
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String cp = UTF16.valueOf(i);
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String mapped = UCharacter.foldCase(cp, defaultmapping);
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (mapped.equals(cp)) continue;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (maxLength < mapped.length()) maxLength = mapped.length();
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // at this point, have different case folding
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Set s = (Set) fromCaseFold.get(mapped);
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (s == null) {
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s = new HashSet();
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s.add(mapped); // add the case fold result itself
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fromCaseFold.put(mapped, s);
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s.add(cp);
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toCaseFold.put(cp, mapped);
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            toCaseFold.put(mapped, mapped); // add mapping to self
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Emit the final data
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (DUMP) {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("maxLength = " + maxLength);
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("\nfromCaseFold:");
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Iterator it = fromCaseFold.keySet().iterator();
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (it.hasNext()) {
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Object key = it.next();
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.print(" " + toHex2.transliterate((String)key) + ": ");
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Set s = (Set) fromCaseFold.get(key);
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Iterator it2 = s.iterator();
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean first = true;
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (it2.hasNext()) {
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (first) {
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        first = false;
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        System.out.print(", ");
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.print(toHex2.transliterate((String)it2.next()));
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println("");
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("\ntoCaseFold:");
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            it = toCaseFold.keySet().iterator();
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (it.hasNext()) {
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String key = (String) it.next();
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String value = (String) toCaseFold.get(key);
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println(" " + toHex2.transliterate(key) + ": " + toHex2.transliterate(value));
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Now convert all those sets into linear arrays
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We can't do this in place in Java, so make a temporary target array
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: This could be transformed into a single array, with offsets into it.
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Might be best choice in C.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Map fromCaseFold2 = new HashMap();
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator it = fromCaseFold.keySet().iterator();
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (it.hasNext()) {
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Object key = it.next();
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Set s = (Set) fromCaseFold.get(key);
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String[] temp = new String[s.size()];
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s.toArray(temp);
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            fromCaseFold2.put(key, temp);
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fromCaseFold = fromCaseFold2;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We have processed everything, so the iterator will now work
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The following is normally OFF.
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // It is here to generate (under the GENERATE flag) the static exception list.
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // It must be at the very end of initialization, so that the iterator is functional.
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // (easiest to do it that way)
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (GENERATE) {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // first get small set of items that have multiple characters
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Set multichars = new TreeSet();
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            it = fromCaseFold.keySet().iterator();
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (it.hasNext()) {
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String key = (String) it.next();
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (UTF16.countCodePoint(key) < 2) continue;
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                multichars.add(key);
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // now we will go through each of them.
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CaseIterator ci = new CaseIterator();
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            it = multichars.iterator();
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (it.hasNext()) {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String key = (String) it.next();
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // here is a nasty complication. Take 'ffi' ligature. We
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // can't just close it, since we would miss the combination
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // that includes the 'fi' => "fi" ligature
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // so first do a pass through, and add substring combinations
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we call this a 'partial closure'
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Set partialClosure = new TreeSet();
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                partialClosure.add(key);
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (UTF16.countCodePoint(key) > 2) {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    Iterator multiIt2 = multichars.iterator();
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    while (multiIt2.hasNext()) {
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        String otherKey = (String) multiIt2.next();
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (otherKey.length() >= key.length()) continue;
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int pos = -1;
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        while (true) {
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // The following is not completely general
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // but works for the actual cased stuff,
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // and should work for future characters, since we won't have
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // more ligatures & other oddities.
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            pos = key.indexOf(otherKey, pos+1);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if (pos < 0) break;
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            int endPos = pos + otherKey.length();
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // we know we have a proper substring,
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // so get the combinations
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            String[] choices = (String[]) fromCaseFold.get(otherKey);
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            for (int ii = 0; ii < choices.length; ++ii) {
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                String patchwork = key.substring(0, pos)
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    + choices[ii]
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    + key.substring(endPos);
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                partialClosure.add(patchwork);
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // now, for each thing in the partial closure, get its
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // case closure and add it to the final result.
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Set closure = new TreeSet(); // this will be the real closure
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Iterator partialIt = partialClosure.iterator();
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (partialIt.hasNext()) {
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    String key2 = (String) partialIt.next();
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ci.reset(key2);
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (String temp = ci.next(); temp != null; temp = ci.next()) {
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        closure.add(temp);
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // form closure
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*String[] choices = (String[]) fromCaseFold.get(key2);
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (int i = 0; i < choices.length; ++i) {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ci.reset(choices[i]);
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        String temp;
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        while (null != (temp = ci.next())) {
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            closure.add(temp);
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    */
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // print it out, so that it can be cut and pasted back into this document.
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Iterator it2 = closure.iterator();
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println("\t// " + toName.transliterate(key));
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.print("\t{\"" + toHex.transliterate(key) + "\",");
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (it2.hasNext()) {
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    String item = (String)it2.next();
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    System.out.print("\"" + toHex.transliterate(item) + "\",");
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println("},");
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ============ PRIVATE CLASS DATA ============
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // pieces that we will put together
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // is not changed during iteration
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int count = 0;
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String[][] variants;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // state information, changes during iteration
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean done = false;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int[] counts;
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // internal buffer for efficiency
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuffer nextBuffer = new StringBuffer();
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ========================
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reset to different source. Once reset, the iteration starts from the beginning.
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param source The string to get case variants for
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset(String source) {
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // allocate arrays to store pieces
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // using length might be slightly too long, but we don't care much
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        counts = new int[source.length()];
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variants = new String[source.length()][];
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // walk through the source, and break up into pieces
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // each piece becomes an array of equivalent values
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: could optimized this later to coalesce all single string pieces
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String piece = null;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        count = 0;
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < source.length(); i += piece.length()) {
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // find *longest* matching piece
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String caseFold = null;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (GENERATE) {
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // do exactly one CP
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                piece = UTF16.valueOf(source, i);
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                caseFold = (String) toCaseFold.get(piece);
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int max = i + maxLength;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (max > source.length()) max = source.length();
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int j = max; j > i; --j) {
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    piece = source.substring(i, j);
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    caseFold = (String) toCaseFold.get(piece);
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (caseFold != null) break;
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if we fail, pick one code point
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (caseFold == null) {
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                piece = UTF16.valueOf(source, i);
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                variants[count++] = new String[] {piece}; // single item string
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                variants[count++] = (String[])fromCaseFold.get(caseFold);
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        reset();
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Restart the iteration from the beginning, but with same source
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void reset() {
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        done = false;
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < count; ++i) {
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            counts[i] = 0;
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Iterates through the case variants.
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return next case variant. Each variant will case-fold to the same value as the source will.
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When the iteration is done, null is returned.
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String next() {
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (done) return null;
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int i;
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO Optimize so we keep the piece before and after the current position
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // so we don't have so much concatenation
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // get the result, a concatenation
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nextBuffer.setLength(0);
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (i = 0; i < count; ++i) {
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nextBuffer.append(variants[i][counts[i]]);
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // find the next right set of pieces to concatenate
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (i = count-1; i >= 0; --i) {
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            counts[i]++;
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (counts[i] < variants[i].length) break;
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            counts[i] = 0;
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if we go too far, bail
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (i < 0) {
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            done = true;
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return nextBuffer.toString();
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Temporary test, just to see how the stuff works.
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static public void main(String[] args) {
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String[] testCases = {"fiss", "h\u03a3"};
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CaseIterator ci = new CaseIterator();
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i < testCases.length; ++i) {
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String item = testCases[i];
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println();
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("Testing: " + toName.transliterate(item));
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println();
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ci.reset(item);
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int count = 0;
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (String temp = ci.next(); temp != null; temp = ci.next()) {
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.println(toName.transliterate(temp));
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                count++;
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            System.out.println("Total: " + count);
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // generate a list of all caseless characters -- characters whose
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // case closure is themselves.
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet caseless = new UnicodeSet();
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i = 0; i <= 0x10FFFF; ++i) {
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String cp = UTF16.valueOf(i);
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ci.reset(cp);
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int count = 0;
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String fold = null;
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (String temp = ci.next(); temp != null; temp = ci.next()) {
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                fold = temp;
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (++count > 1) break;
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (count==1 && fold.equals(cp)) {
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                caseless.add(i);
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        System.out.println("caseless = " + caseless.toPattern(true));
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet not_lc = new UnicodeSet("[:^lc:]");
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet a = new UnicodeSet();
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        a.set(not_lc);
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        a.removeAll(caseless);
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        System.out.println("[:^lc:] - caseless = " + a.toPattern(true));
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        a.set(caseless);
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        a.removeAll(not_lc);
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        System.out.println("caseless - [:^lc:] = " + a.toPattern(true));
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
564