12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Copyright (C) 1996-2014, International Business Machines Corporation and    *
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * others. All Rights Reserved.                                                *
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.text;
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.HashSet;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.List;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Set;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Norm2AllModes;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Utility;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UCharacter;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/**
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * This class allows one to iterate through all the strings that are canonically equivalent to a given
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * string. For example, here are some sample results:
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * Results for: {A WITH RING ABOVE}{d}{DOT ABOVE}{CEDILLA}
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * <pre>
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 1: {A}{RING ABOVE}{d}{DOT ABOVE}{CEDILLA}
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 2: {A}{RING ABOVE}{d}{CEDILLA}{DOT ABOVE}
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 3: {A}{RING ABOVE}{d WITH DOT ABOVE}{CEDILLA}
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 4: {A}{RING ABOVE}{d WITH CEDILLA}{DOT ABOVE}
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 5: {A WITH RING ABOVE}{d}{DOT ABOVE}{CEDILLA}
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 6: {A WITH RING ABOVE}{d}{CEDILLA}{DOT ABOVE}
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 7: {A WITH RING ABOVE}{d WITH DOT ABOVE}{CEDILLA}
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 8: {A WITH RING ABOVE}{d WITH CEDILLA}{DOT ABOVE}
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller 9: {ANGSTROM SIGN}{d}{DOT ABOVE}{CEDILLA}
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller10: {ANGSTROM SIGN}{d}{CEDILLA}{DOT ABOVE}
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller11: {ANGSTROM SIGN}{d WITH DOT ABOVE}{CEDILLA}
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller12: {ANGSTROM SIGN}{d WITH CEDILLA}{DOT ABOVE}
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *</pre>
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * since it has not been optimized for that situation.
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller * @author M. Davis
44836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller * @hide Only a subset of ICU is exposed in Android
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CanonicalIterator {
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Construct a CanonicalIterator object
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source string to get results for
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public CanonicalIterator(String source) {
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Norm2AllModes allModes = Norm2AllModes.getNFCInstance();
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nfd = allModes.decomp;
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nfcImpl = allModes.impl.ensureCanonIterData();
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        setSource(source);
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets the NFD form of the current source we are iterating over.
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return gets the source: NOTE: it is the NFD form of the source originally passed in
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getSource() {
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller      return source;
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Resets the iterator so that one can start again from the beginning.
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void reset() {
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        done = false;
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < current.length; ++i) {
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            current[i] = 0;
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Get the next canonically equivalent string.
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the next string that is canonically equivalent. The value null is returned when
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the iteration is done.
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String next() {
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (done) return null;
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // construct return value
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.setLength(0); // delete old contents
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < pieces.length; ++i) {
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append(pieces[i][current[i]]);
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String result = buffer.toString();
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // find next value for next time
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = current.length - 1; ; --i) {
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (i < 0) {
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                done = true;
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            current[i]++;
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (current[i] < pieces[i].length) break; // got sequence
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            current[i] = 0;
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Set a new source for this iterator. Allows object reuse.
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param newSource the source string to iterate against. This allows the same iterator to be used
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * while changing the source string, saving object creation.
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void setSource(String newSource) {
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        source = nfd.normalize(newSource);
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        done = false;
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // catch degenerate case
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (newSource.length() == 0) {
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pieces = new String[1][];
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            current = new int[1];
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pieces[0] = new String[]{""};
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // find the segments
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        List<String> segmentList = new ArrayList<String>();
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start = 0;
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // i should be the end of the first code point
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // break up the string into segements
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int i = UTF16.findOffsetFromCodePoint(source, 1);
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (; i < source.length(); i += Character.charCount(cp)) {
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = source.codePointAt(i);
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (nfcImpl.isCanonSegmentStarter(cp)) {
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                segmentList.add(source.substring(start, i)); // add up to i
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = i;
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        segmentList.add(source.substring(start, i)); // add last one
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // allocate the arrays, and find the strings that are CE to each segment
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        pieces = new String[segmentList.size()][];
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        current = new int[segmentList.size()];
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (i = 0; i < pieces.length; ++i) {
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (PROGRESS) System.out.println("SEGMENT");
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            pieces[i] = getEquivalents(segmentList.get(i));
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Simple implementation of permutation.
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param source the string to find permutations for
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param skipZeros set to true to skip characters with canonical combining class zero
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param output the set to add the results to
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @deprecated This API is ICU internal only.
160836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller     * @hide draft / provisional / internal are hidden on Android
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Deprecated
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static void permute(String source, boolean skipZeros, Set<String> output) {
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: optimize
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //if (PROGRESS) System.out.println("Permute: " + source);
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // optimization:
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // if zero or one character, just return a set with it
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // we check for length < 2 to keep from counting code points all the time
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (source.length() <= 2 && UTF16.countCodePoint(source) <= 1) {
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            output.add(source);
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // otherwise iterate through the string, and recursively permute all the other characters
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set<String> subpermute = new HashSet<String>();
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(source, i);
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // optimization:
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if the character is canonical combining class zero,
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // don't permute it
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (skipZeros && i != 0 && UCharacter.getCombiningClass(cp) == 0) {
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i)));
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // see what the permutations of the characters before and after this one are
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            subpermute.clear();
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            permute(source.substring(0,i)
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                + source.substring(i + UTF16.getCharCount(cp)), skipZeros, subpermute);
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // prefix this character to all of them
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String chStr = UTF16.valueOf(source, i);
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (String s : subpermute) {
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String piece = chStr + s;
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //if (PROGRESS) System.out.println("  Piece: " + piece);
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                output.add(piece);
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // FOR TESTING
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *@return the set of "safe starts", characters that are class zero AND are never non-initial in a decomposition.
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static UnicodeSet getSafeStart() {
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (UnicodeSet) SAFE_START.clone();
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *@return the set of characters whose decompositions start with the given character
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static UnicodeSet getStarts(int cp) {
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet result = AT_START.get(cp);
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (result == null) result = EMPTY;
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (UnicodeSet) result.clone();
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // ===================== PRIVATES ==============================
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // debug
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean PROGRESS = false; // debug progress
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //private static Transliterator NAME = PROGRESS ? Transliterator.getInstance("name") : null;
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean SKIP_ZEROS = true;
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // fields
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final Normalizer2 nfd;
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private final Normalizer2Impl nfcImpl;
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String source;
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean done;
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String[][] pieces;
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] current;
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Note: C will need two more fields, since arrays there don't have lengths
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // int pieces_length;
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // int[] pieces_lengths;
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // transient fields
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private transient StringBuilder buffer = new StringBuilder();
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String[] getEquivalents(String segment) {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set<String> result = new HashSet<String>();
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set<String> basic = getEquivalents2(segment);
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set<String> permutations = new HashSet<String>();
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // now get all the permutations
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // add only the ones that are canonically equivalent
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: optimize by not permuting any class zero.
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<String> it = basic.iterator();
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while (it.hasNext()) {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String item = it.next();
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            permutations.clear();
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            permute(item, SKIP_ZEROS, permutations);
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Iterator<String> it2 = permutations.iterator();
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (it2.hasNext()) {
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String possible = it2.next();
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String attempt = Normalizer.normalize(possible, Normalizer.DECOMP, 0);
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (attempt.equals(segment)) {
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (Normalizer.compare(possible, segment,0)==0) {
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (PROGRESS) System.out.println("Adding Permutation: " + Utility.hex(possible));
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.add(possible);
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (PROGRESS) System.out.println("-Skipping Permutation: " + Utility.hex(possible));
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // convert into a String[] to clean up storage
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String[] finalResult = new String[result.size()];
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result.toArray(finalResult);
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return finalResult;
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Set<String> getEquivalents2(String segment) {
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set<String> result = new HashSet<String>();
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println("Adding: " + Utility.hex(segment));
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result.add(segment);
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuffer workingBuffer = new StringBuffer();
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet starts = new UnicodeSet();
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // cycle through all the characters
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < segment.length(); i += Character.charCount(cp)) {
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // see if any character is at the start of some decomposition
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = segment.codePointAt(i);
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (!nfcImpl.getCanonStartSet(cp, starts)) {
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller              continue;
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if so, see which decompositions match
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(UnicodeSetIterator iter = new UnicodeSetIterator(starts); iter.next();) {
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cp2 = iter.codepoint;
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Set<String> remainder = extract(cp2, segment, i, workingBuffer);
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (remainder == null) {
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // there were some matches, so add all the possibilities to the set.
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String prefix= segment.substring(0,i);
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prefix += UTF16.valueOf(cp2);
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for (String item : remainder) {
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.add(prefix + item);
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Set result = new HashSet();
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(segment));
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        result.add(segment);
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuffer workingBuffer = new StringBuffer();
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // cycle through all the characters
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = 0; i < segment.length(); i += UTF16.getCharCount(cp)) {
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // see if any character is at the start of some decomposition
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(segment, i);
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            NormalizerImpl.getCanonStartSet(c,fillSet)
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet starts = AT_START.get(cp);
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (starts == null) continue;
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSetIterator usi = new UnicodeSetIterator(starts);
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // if so, see which decompositions match
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while (usi.next()) {
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cp2 = usi.codepoint;
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we know that there are no strings in it
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // so we don't have to check CharacterIterator.IS_STRING
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Set remainder = extract(cp2, segment, i, workingBuffer);
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (remainder == null) continue;
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // there were some matches, so add all the possibilities to the set.
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String prefix = segment.substring(0, i) + UTF16.valueOf(cp2);
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Iterator it = remainder.iterator();
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while (it.hasNext()) {
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String item = (String) it.next();
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if (PROGRESS) System.out.println("Adding: " + NAME.transliterate(prefix + item));
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    result.add(prefix + item);
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return result;
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        */
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * See if the decomposition of cp2 is at segment starting at segmentPos
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (with canonical rearrangment!)
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If so, take the remainder, and return the equivalents
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Set<String> extract(int comp, String segment, int segmentPos, StringBuffer buf) {
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println(" extract: " + Utility.hex(UTF16.valueOf(comp))
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            + ", " + Utility.hex(segment.substring(segmentPos)));
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String decomp = nfcImpl.getDecomposition(comp);
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (decomp == null) {
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decomp = UTF16.valueOf(comp);
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // See if it matches the start of segment (at segmentPos)
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean ok = false;
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cp;
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int decompPos = 0;
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int decompCp = UTF16.charAt(decomp,0);
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        decompPos += UTF16.getCharCount(decompCp); // adjust position to skip first char
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        //int decompClass = getClass(decompCp);
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buf.setLength(0); // initialize working buffer, shared among callees
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int i = segmentPos; i < segment.length(); i += UTF16.getCharCount(cp)) {
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cp = UTF16.charAt(segment, i);
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (cp == decompCp) { // if equal, eat another cp from decomp
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (PROGRESS) System.out.println("  matches: " + Utility.hex(UTF16.valueOf(cp)));
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (decompPos == decomp.length()) { // done, have all decomp characters!
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buf.append(segment.substring(i + UTF16.getCharCount(cp))); // add remaining segment chars
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ok = true;
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompCp = UTF16.charAt(decomp, decompPos);
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompPos += UTF16.getCharCount(decompCp);
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //decompClass = getClass(decompCp);
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (PROGRESS) System.out.println("  buffer: " + Utility.hex(UTF16.valueOf(cp)));
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // brute force approach
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                UTF16.append(buf, cp);
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /* TODO: optimize
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // since we know that the classes are monotonically increasing, after zero
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // e.g. 0 5 7 9 0 3
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we can do an optimization
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // there are only a few cases that work: zero, less, same, greater
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // if both classes are the same, we fail
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // if the decomp class < the segment class, we fail
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                segClass = getClass(cp);
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (decompClass <= segClass) return null;
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                */
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!ok) return null; // we failed, characters left over
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println("Matches");
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (buf.length() == 0) return SET_WITH_NULL_STRING; // succeed, but no remainder
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String remainder = buf.toString();
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // brute force approach
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // to check to make sure result is canonically equivalent
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String trial = Normalizer.normalize(UTF16.valueOf(comp) + remainder, Normalizer.DECOMP, 0);
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null;
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        */
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (0!=Normalizer.compare(UTF16.valueOf(comp) + remainder, segment.substring(segmentPos), 0)) return null;
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // get the remaining combinations
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getEquivalents2(remainder);
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: fix once we have a codepoint interface to get the canonical combining class
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Need public access to canonical combining class in UCharacter!
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int getClass(int cp) {
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return Normalizer.getClass((char)cp);
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller   // ================= BUILDER =========================
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Flatten this data so it doesn't have to be reconstructed each time!
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //private static final UnicodeSet EMPTY = new UnicodeSet(); // constant, don't change
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final Set<String> SET_WITH_NULL_STRING = new HashSet<String>(); // constant, don't change
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static {
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        SET_WITH_NULL_STRING.add("");
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  //  private static UnicodeSet SAFE_START = new UnicodeSet();
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  //  private static CharMap AT_START = new CharMap();
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: WARNING, NORMALIZER doesn't have supplementaries yet !!;
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Change FFFF to 10FFFF in C, and in Java when normalizer is upgraded.
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller  //  private static int LAST_UNICODE = 0x10FFFF;
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static {
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buildData();
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static void buildData() {
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println("Getting Safe Start");
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int cp = 0; cp <= LAST_UNICODE; ++cp) {
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (PROGRESS & (cp & 0x7FF) == 0) System.out.print('.');
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int cc = UCharacter.getCombiningClass(cp);
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (cc == 0) SAFE_START.add(cp);
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // will fix to be really safe below
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println();
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println("Getting Containment");
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (int cp = 0; cp <= LAST_UNICODE; ++cp) {
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (PROGRESS & (cp & 0x7FF) == 0) System.out.print('.');
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (Normalizer.isNormalized(cp, Normalizer.NFD)) continue;
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //String istr = UTF16.valueOf(cp);
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String decomp = Normalizer.normalize(cp, Normalizer.NFD);
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            //if (decomp.equals(istr)) continue;
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // add each character in the decomposition to canBeIn
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int component;
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for (int i = 0; i < decomp.length(); i += UTF16.getCharCount(component)) {
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                component = UTF16.charAt(decomp, i);
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (i == 0) {
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    AT_START.add(component, cp);
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if (UCharacter.getCombiningClass(component) == 0) {
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    SAFE_START.remove(component);
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if (PROGRESS) System.out.println();
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the following is just for a map from characters to a set of characters
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class CharMap {
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Map storage = new HashMap();
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        MutableInt probe = new MutableInt();
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean converted = false;
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void add(int cp, int whatItIsIn) {
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet result = (UnicodeSet) storage.get(probe.set(cp));
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (result == null) {
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                result = new UnicodeSet();
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                storage.put(probe, result);
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            result.add(whatItIsIn);
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public UnicodeSet get(int cp) {
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return (UnicodeSet) storage.get(probe.set(cp));
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static class MutableInt {
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int contents;
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int hashCode() { return contents; }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean equals(Object other) {
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return ((MutableInt)other).contents == contents;
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // allows chaining
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public MutableInt set(int contents) {
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            this.contents = contents;
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    */
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
529