Normalizer2Impl.java revision 1537b2f39245c07b00aa78c3600f7aebcb172490
12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
22ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
32ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Copyright (C) 2009-2015, International Business Machines
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Corporation and others.  All Rights Reserved.
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer;
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
211537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
221537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide All android.icu classes are currently hidden
24836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Normalizer2Impl {
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class Hangul {
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Korean Hangul and Jamo constants */
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_END=0x1112;
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_END=0x1175;
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_END=0x11c2;
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_BASE=0xac00;
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_END=0xd7a3;
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_COUNT=19;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_COUNT=21;
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_COUNT=28;
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT;
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT;
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT;
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isHangul(int c) {
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return HANGUL_BASE<=c && c<HANGUL_LIMIT;
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isHangulWithoutJamoT(char c) {
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c-=HANGUL_BASE;
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isJamoL(int c) {
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return JAMO_L_BASE<=c && c<JAMO_L_LIMIT;
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isJamoV(int c) {
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return JAMO_V_BASE<=c && c<JAMO_V_LIMIT;
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Decomposes c, which must be a Hangul syllable, into buffer
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * and returns the length of the decomposition (2 or 3).
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static int decompose(int c, Appendable buffer) {
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            try {
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c-=HANGUL_BASE;
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c2=c%JAMO_T_COUNT;
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c/=JAMO_T_COUNT;
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c2==0) {
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 2;
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_T_BASE+c2));
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 3;
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } catch(IOException e) {
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Will not occur because we do not write to I/O.
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException(e);
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Decomposes c, which must be a Hangul syllable, into buffer.
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This is the raw, not recursive, decomposition. Its length is always 2.
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static void getRawDecomposition(int c, Appendable buffer) {
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            try {
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int orig=c;
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c-=HANGUL_BASE;
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c2=c%JAMO_T_COUNT;
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c2==0) {
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c/=JAMO_T_COUNT;
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(orig-c2));  // LV syllable
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_T_BASE+c2));
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } catch(IOException e) {
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Will not occur because we do not write to I/O.
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException(e);
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Writable buffer that takes care of canonical ordering.
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Its Appendable methods behave like the C++ implementation's
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * appendZeroCC() methods.
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If dest is a StringBuilder, then the buffer writes directly to it.
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * until no further changes are necessary and whole segments are appended.
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * append() methods that take combining-class values always write to the StringBuilder.
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Other append() methods flush and append to the Appendable.
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class ReorderingBuffer implements Appendable {
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) {
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            impl=ni;
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            app=dest;
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(app instanceof StringBuilder) {
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appIsStringBuilder=true;
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str=(StringBuilder)dest;
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // In Java, the constructor subsumes public void init(int destCapacity) {
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.ensureCapacity(destCapacity);
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=0;
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(str.length()==0) {
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastCC=0;
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setIterator();
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastCC=previousCC();
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Set reorderStart after the last code point with cc<=1 if there is one.
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(lastCC>1) {
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        while(previousCC()>1) {}
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit;
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appIsStringBuilder=false;
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str=new StringBuilder();
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=0;
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean isEmpty() { return str.length()==0; }
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int length() { return str.length(); }
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int getLastCC() { return lastCC; }
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public StringBuilder getStringBuilder() { return str; }
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean equals(CharSequence s, int start, int limit) {
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void setLastChar(char c) {
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.setCharAt(str.length()-1, c);
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void append(int c, int cc) {
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(lastCC<=cc || cc==0) {
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.appendCodePoint(c);
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=cc;
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length();
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                insert(c, cc);
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // s must be in NFD, otherwise change the implementation.
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void append(CharSequence s, int start, int limit,
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           int leadCC, int trailCC) {
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(start==limit) {
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(lastCC<=leadCC || leadCC==0) {
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(trailCC<=1) {
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length()+(limit-start);
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(leadCC<=1) {
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length()+1;  // Ok if not a code point boundary.
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=trailCC;
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c=Character.codePointAt(s, start);
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start+=Character.charCount(c);
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                insert(c, leadCC);  // insert first code point
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while(start<limit) {
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c=Character.codePointAt(s, start);
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    start+=Character.charCount(c);
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(start<limit) {
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // s must be in NFD, otherwise we need to use getCC().
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        leadCC=trailCC;
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    append(c, leadCC);
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The following append() methods work like C++ appendZeroCC().
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // They assume that the cc or trailCC of their input is 0.
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Most of them implement Appendable interface methods.
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // @Override when we switch to Java 6
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(char c) {
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.append(c);
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void appendZeroCC(int c) {
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.appendCodePoint(c);
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // @Override when we switch to Java 6
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(CharSequence s) {
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s.length()!=0) {
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s);
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // @Override when we switch to Java 6
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(CharSequence s, int start, int limit) {
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(start!=limit) {
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Flushes from the intermediate StringBuilder to the Appendable,
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * if they are different objects.
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Used after recomposition.
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Must be called at the end when writing to a non-StringBuilder Appendable.
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void flush() {
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(appIsStringBuilder) {
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    app.append(str);
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    str.setLength(0);
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=0;
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(IOException e) {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Flushes from the intermediate StringBuilder to the Appendable,
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * if they are different objects.
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Then appends the new text to the Appendable or StringBuilder.
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Normally used after quick check loops find a non-empty sequence.
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(appIsStringBuilder) {
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    app.append(str).append(s, start, limit);
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    str.setLength(0);
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=0;
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(IOException e) {
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void remove() {
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.setLength(0);
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=0;
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void removeSuffix(int suffixLength) {
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int oldLength=str.length();
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.delete(oldLength-suffixLength, oldLength);
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * TODO: Revisit whether it makes sense to track reorderStart.
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * It is set to after the last known character with cc<=1,
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * which stops previousCC() before it reads that character and looks up its cc.
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * previousCC() is normally only called from insert().
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * In other words, reorderStart speeds up the insertion of a combining mark
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * into a multi-combining mark sequence where it does not belong at the end.
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This might not be worth the trouble.
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * On the other hand, it's not a huge amount of trouble.
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * We probably need it for UNORM_SIMPLE_APPEND.
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Inserts c somewhere before the last character.
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Requires 0<cc<lastCC which implies reorderStart<limit.
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void insert(int c, int cc) {
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(setIterator(), skipPrevious(); previousCC()>cc;) {}
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // insert c at codePointLimit, after the character with prevCC<=cc
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<=0xffff) {
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.insert(codePointLimit, (char)c);
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit+1;
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.insert(codePointLimit, Character.toChars(c));
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit+2;
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final Normalizer2Impl impl;
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final Appendable app;
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final StringBuilder str;
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final boolean appIsStringBuilder;
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int reorderStart;
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int lastCC;
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // private backward iterator
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void setIterator() { codePointStart=str.length(); }
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void skipPrevious() {  // Requires 0<codePointStart.
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointLimit=codePointStart;
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointStart=str.offsetByCodePoints(codePointStart, -1);
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int previousCC() {  // Returns 0 if there is no previous character.
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointLimit=codePointStart;
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(reorderStart>=codePointStart) {
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=str.codePointBefore(codePointStart);
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointStart-=Character.charCount(c);
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<MIN_CCC_LCCC_CP) {
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return getCCFromYesOrMaybe(impl.getNorm16(c));
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int codePointStart, codePointLimit;
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose as public API on the UTF16 class.
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose widening UTF16 methods that take char to take int.
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose widening UTF16 methods that take String to take CharSequence.
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class UTF16Plus {
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * is it a lead surrogate?
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param c code unit or code point
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true or false
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Compares two CharSequence objects for binary equality.
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s1 first sequence
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s2 second sequence
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true if s1 contains the same text as s2
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean equal(CharSequence s1,  CharSequence s2) {
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s1==s2) {
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int length=s1.length();
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(length!=s2.length()) {
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int i=0; i<length; ++i) {
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(s1.charAt(i)!=s2.charAt(i)) {
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Compares two CharSequence subsequences for binary equality.
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s1 first sequence
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param start1 start offset in first sequence
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param limit1 limit offset in first sequence
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s2 second sequence
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param start2 start offset in second sequence
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param limit2 limit offset in second sequence
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true if s1.subSequence(start1, limit1) contains the same text
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *              as s2.subSequence(start2, limit2)
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean equal(CharSequence s1, int start1, int limit1,
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    CharSequence s2, int start2, int limit2) {
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((limit1-start1)!=(limit2-start2)) {
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s1==s2 && start1==start2) {
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(start1<limit1) {
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(s1.charAt(start1++)!=s2.charAt(start2++)) {
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl() {}
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final class IsAcceptable implements ICUBinary.Authenticate {
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // @Override when we switch to Java 6
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean isDataVersionAcceptable(byte version[]) {
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return version[0]==2;
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl load(ByteBuffer bytes) {
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(indexesLength<=IX_MIN_MAYBE_YES) {
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] inIndexes=new int[indexesLength];
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            inIndexes[0]=indexesLength*4;
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int i=1; i<indexesLength; ++i) {
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                inIndexes[i]=bytes.getInt();
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minYesNo=inIndexes[IX_MIN_YES_NO];
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minNoNo=inIndexes[IX_MIN_NO_NO];
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            limitNoNo=inIndexes[IX_LIMIT_NO_NO];
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Read the normTrie.
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int offset=inIndexes[IX_NORM_TRIE_OFFSET];
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            normTrie=Trie2_16.createFromSerialized(bytes);
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int trieLength=normTrie.getSerializedLength();
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(trieLength>(nextOffset-offset)) {
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie");
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Read the composition and mapping data.
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset=nextOffset;
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int numChars=(nextOffset-offset)/2;
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(numChars!=0) {
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                extraData=maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES-minMaybeYes);
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // smallFCD: new in formatVersion 2
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset=nextOffset;
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            smallFCD=new byte[0x100];
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            bytes.get(smallFCD);
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Build tccc180[].
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            tccc180=new int[0x180];
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int bits=0;
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int c=0; c<0x180; bits>>=1) {
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((c&0xff)==0) {
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    bits=smallFCD[c>>8];  // one byte per 0x100 code points
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((bits&1)!=0) {
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    for(int i=0; i<0x20; ++i, ++c) {
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        tccc180[c]=getFCD16FromNormData(c)&0xff;
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c+=0x20;
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch(IOException e) {
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl load(String name) {
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return load(ICUBinary.getRequiredData(name));
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isAlgorithmicNoNo(norm16)) {
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Range of code points with same-norm16-value algorithmic decompositions.
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // They might have different non-zero FCD16 values.
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            do {
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int fcd16=getFCD16(start);
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(fcd16>0xff) { set.add(start); }
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } while(++start<=end);
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int fcd16=getFCD16(start);
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(fcd16>0xff) { set.add(start, end); }
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) {
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point to the USet */
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.add(start);
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(start!=end && isAlgorithmicNoNo(value)) {
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Range of code points with same-norm16-value algorithmic decompositions.
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // They might have different non-zero FCD16 values.
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int prevFCD16=getFCD16(start);
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(++start<=end) {
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int fcd16=getFCD16(start);
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(fcd16!=prevFCD16) {
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    set.add(start);
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=fcd16;
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addLcccChars(UnicodeSet set) {
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point of each same-value range of each trie */
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set);
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addPropertyStarts(UnicodeSet set) {
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point of each same-value range of each trie */
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set);
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add Hangul LV syllables and LV+1 because of skippables */
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) {
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(c);
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(c+1);
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addCanonIterPropertyStarts(UnicodeSet set) {
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point of each same-value range of the canonical iterator data trie */
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ensureCanonIterData();
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // currently only used for the SEGMENT_STARTER property
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=canonIterData.iterator(segmentStarterMapper);
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /* add the start code point to the USet */
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(range.startCodePoint);
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() {
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int map(int in) {
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return in&CANON_NOT_SEGMENT_STARTER;
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // low-level properties ------------------------------------------------ ***
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Trie2_16 getNormTrie() { return normTrie; }
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Note: Normalizer2Impl.java r30983 (2011-nov-27)
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // still had getFCDTrie() which built and cached an FCD trie.
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // That provided faster access to FCD data than getFCD16FromNormData()
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // but required synchronization and consumed some 10kB of heap memory
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // in any process that uses FCD (e.g., via collation).
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // tccc180[] and smallFCD[] are intended to help with any loss of performance,
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // at least for Latin & CJK.
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Builds the canonical-iterator data for this instance.
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is required before any of {@link #isCanonSegmentStarter(int)} or
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * {@link #getCanonStartSet(int, UnicodeSet)} are called,
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else they crash.
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public synchronized Normalizer2Impl ensureCanonIterData() {
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(canonIterData==null) {
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Trie2Writable newData=new Trie2Writable(0, 0);
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            canonStartSets=new ArrayList<UnicodeSet>();
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Trie2.Range range;
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                final int norm16=range.value;
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) {
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Inert, or 2-way mapping (including Hangul syllable).
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We do not write a canonStartSet for any yesNo character.
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Composites from 2-way mappings are added at runtime from the
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // starter's compositions list, and the other characters in
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // "maybe" characters.
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for(int c=range.startCodePoint; c<=range.endCodePoint; ++c) {
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    final int oldValue=newData.get(c);
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int newValue=oldValue;
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(norm16>=minMaybeYes) {
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // not a segment starter if it occurs in a decomposition or has cc!=0
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newValue|=CANON_NOT_SEGMENT_STARTER;
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(norm16<MIN_NORMAL_MAYBE_YES) {
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            newValue|=CANON_HAS_COMPOSITIONS;
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(norm16<minYesNo) {
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newValue|=CANON_HAS_COMPOSITIONS;
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // c has a one-way decomposition
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int c2=c;
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int norm16_2=norm16;
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c2=this.mapAlgorithmic(c2, norm16_2);
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            norm16_2=getNorm16(c2);
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // c decomposes, get everything from the variable-length extra data
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            int firstUnit=extraData.charAt(norm16_2);
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            int length=firstUnit&MAPPING_LENGTH_MASK;
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(c==c2 && (extraData.charAt(norm16_2-1)&0xff)!=0) {
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Skip empty mappings (no characters in the decomposition).
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(length!=0) {
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                ++norm16_2;  // skip over the firstUnit
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // add c to first code point's start set
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                int limit=norm16_2+length;
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                c2=extraData.codePointAt(norm16_2);
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                addToStartSet(newData, c, c2);
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // one-way mapping. A 2-way mapping is possible here after
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // intermediate algorithmic mapping.
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(norm16_2>=minNoNo) {
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    while((norm16_2+=Character.charCount(c2))<limit) {
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        c2=extraData.codePointAt(norm16_2);
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        int c2Value=newData.get(c2);
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                            newData.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER);
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        }
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    }
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // c decomposed to c2 algorithmically; c has cc==0
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            addToStartSet(newData, c, c2);
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(newValue!=oldValue) {
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newData.set(c, newValue);
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            canonIterData=newData.toTrie2_32();
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getNorm16(int c) { return normTrie.get(c); }
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getCompQuickCheck(int norm16) {
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 1;  // yes
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(minMaybeYes<=norm16) {
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 2;  // maybe
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;  // no
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getCC(int norm16) {
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16>=MIN_NORMAL_MAYBE_YES) {
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return norm16&0xff;
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16<minNoNo || limitNoNo<=norm16) {
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getCCFromNoNo(norm16);
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int getCCFromYesOrMaybe(int norm16) {
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return norm16>=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0;
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the FCD data for code point c.
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getFCD16(int c) {
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(c<0) {
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(c<0x180) {
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return tccc180[c];
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(c<=0xffff) {
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getFCD16FromNormData(c);
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Returns the FCD data for U+0000<=c<U+0180. */
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getFCD16FromBelow180(int c) { return tccc180[c]; }
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // 0<=lead<=0xffff
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte bits=smallFCD[lead>>8];
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(bits==0) { return false; }
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((bits>>((lead>>5)&7))&1)!=0;
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Gets the FCD value from the regular normalization data. */
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getFCD16FromNormData(int c) {
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Only loops for 1:1 algorithmic mappings.
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=getNorm16(c);
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(norm16<=minYesNo) {
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // no decomposition or Hangul syllable, all zeros
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // combining mark
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16&=0xff;
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return norm16|(norm16<<8);
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16>=minMaybeYes) {
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c=mapAlgorithmic(c, norm16);
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstUnit=extraData.charAt(norm16);
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // A character that is deleted (maps to an empty string) must
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // get the worst-case lccc and tccc values because arbitrary
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // characters on both sides will become adjacent.
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 0x1ff;
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int fcd16=firstUnit>>8;  // tccc
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        fcd16|=extraData.charAt(norm16-1)&0xff00;  // lccc
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return fcd16;
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets the decomposition for one code point.
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return c's decomposition, if it has one; returns null if it does not have a decomposition
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getDecomposition(int c) {
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int decomp=-1;
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c does not decompose
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isHangul(norm16)) {
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Hangul syllable: decompose algorithmically
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder buffer=new StringBuilder();
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Hangul.decompose(c, buffer);
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return buffer.toString();
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decomp=c=mapAlgorithmic(c, norm16);
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return extraData.substring(norm16, norm16+length);
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(decomp<0) {
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return null;
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return UTF16.valueOf(decomp);
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets the raw decomposition for one code point.
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getRawDecomposition(int c) {
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We do not loop in this method because an algorithmic mapping itself
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // becomes a final result rather than having to be decomposed recursively.
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // c does not decompose
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(isHangul(norm16)) {
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Hangul syllable: decompose algorithmically
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuilder buffer=new StringBuilder();
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Hangul.getRawDecomposition(c, buffer);
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return buffer.toString();
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(isDecompNoAlgorithmic(norm16)) {
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UTF16.valueOf(mapAlgorithmic(c, norm16));
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // c decomposes, get everything from the variable-length extra data
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int firstUnit=extraData.charAt(norm16);
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) {
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int rawMapping=norm16-((firstUnit>>7)&1)-1;
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char rm0=extraData.charAt(rawMapping);
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(rm0<=MAPPING_LENGTH_MASK) {
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return extraData.substring(rawMapping-rm0, rawMapping);
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Copy the normal mapping and replace its first two code units with rm0.
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    StringBuilder buffer=new StringBuilder(mLength-1).append(rm0);
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    norm16+=1+2;  // skip over the firstUnit and the first two mapping code units
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return buffer.append(extraData, norm16, norm16+mLength-2).toString();
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16+=1;  // skip over the firstUnit
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return extraData.substring(norm16, norm16+mLength);
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if code point c starts a canonical-iterator string segment.
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else this method will crash.</b>
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if c starts a canonical-iterator string segment.
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isCanonSegmentStarter(int c) {
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return canonIterData.get(c)>=0;
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if there are characters whose decomposition starts with c.
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If so, then the set is cleared and then filled with those characters.
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else this method will crash.</b>
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param set A UnicodeSet to receive the characters whose decompositions
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *        start with c, if there are any.
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if there are characters whose decomposition starts with c.
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean getCanonStartSet(int c, UnicodeSet set) {
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER;
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(canonValue==0) {
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.clear();
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int value=canonValue&CANON_VALUE_MASK;
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&CANON_HAS_SET)!=0) {
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.addAll(canonStartSets.get(value));
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(value!=0) {
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(value);
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=getNorm16(c);
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(norm16==JAMO_L) {
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT;
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1);
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                addComposites(getCompositionsList(norm16), set);
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MIN_CCC_LCCC_CP=0x300;
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MIN_YES_YES_WITH_CC=0xff01;
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int JAMO_VT=0xff00;
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MIN_NORMAL_MAYBE_YES=0xfe00;
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int JAMO_L=1;
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAX_DELTA=0x40;
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Byte offsets from the start of the data, after the generic header.
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_NORM_TRIE_OFFSET=0;
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_EXTRA_DATA_OFFSET=1;
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_SMALL_FCD_OFFSET=2;
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_RESERVED3_OFFSET=3;
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_TOTAL_SIZE=7;
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Code point thresholds for quick check codes.
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_DECOMP_NO_CP=8;
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Norm16 value thresholds for quick check combinations and types of extra data.
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_YES_NO=10;
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_NO_NO=11;
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_LIMIT_NO_NO=12;
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_MAYBE_YES=13;
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Mappings only in [minYesNoMappingsOnly..minNoNo[.
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_COUNT=16;
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_HAS_RAW_MAPPING=0x40;
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_NO_COMP_BOUNDARY_AFTER=0x20;
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_LENGTH_MASK=0x1f;
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_LAST_TUPLE=0x8000;
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRIPLE=1;
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_LIMIT=0x3400;
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_MASK=0x7ffe;
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_2_TRAIL_SHIFT=6;
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_2_TRAIL_MASK=0xffc0;
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // higher-level functionality ------------------------------------------ ***
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // NFD without an NFD Normalizer2 instance.
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Appendable decompose(CharSequence s, StringBuilder dest) {
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        decompose(s, 0, s.length(), dest, s.length());
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return dest;
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Decomposes s[src, limit[ and writes the result to dest.
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * limit can be NULL if src is NUL-terminated.
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * destLengthEstimate is the initial dest buffer capacity and can be -1.
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                   int destLengthEstimate) {
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(destLengthEstimate<0) {
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            destLengthEstimate=limit-src;
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dest.setLength(0);
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        decompose(s, src, limit, buffer);
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Dual functionality:
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer!=NULL: normalize
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int decompose(CharSequence s, int src, int limit,
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         ReorderingBuffer buffer) {
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minNoCP=minDecompNoCP;
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=0;
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // only for quick check
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevCC=0;
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units below the minimum or with irrelevant data for the quick check
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src; src!=limit;) {
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoCP ||
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!UTF16.isSurrogate((char)c)) {
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char c2;
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16Plus.isSurrogateLead(c)) {
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint((char)c, c2);
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else /* trail surrogate */ {
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --src;
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint(c2, (char)c);
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // copy these code units all at once
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.flushAndAppendZeroCC(s, prevSrc, src);
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC=0;
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary=src;
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src==limit) {
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Check one above-minimum, relevant code point.
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(buffer!=null) {
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompose(c, norm16, buffer);
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isDecompYes(norm16)) {
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int cc=getCCFromYesOrMaybe(norm16);
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prevCC<=cc || cc==0) {
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevCC=cc;
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(cc<=1) {
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            prevBoundary=src;
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return prevBoundary;  // "no" or cc out of order
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src;
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limit=s.length();
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(limit==0) {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doDecompose) {
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decompose(s, 0, limit, buffer);
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Just merge the strings at the boundary.
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=Character.codePointAt(s, 0);
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0;
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int firstCC, prevCC, cc;
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        firstCC=prevCC=cc=getCC(getNorm16(c));
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(cc!=0) {
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevCC=cc;
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src>=limit) {
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=Character.codePointAt(s, src);
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cc=getCC(getNorm16(c));
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        };
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append(s, 0, src, firstCC, prevCC);
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append(s, src, limit);
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // doCompose: normalize
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // !doCompose: isNormalized (buffer must be empty and initialized)
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean compose(CharSequence s, int src, int limit,
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean onlyContiguous,
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean doCompose,
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           ReorderingBuffer buffer) {
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minNoMaybeCP=minCompNoMaybeCP;
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * prevBoundary points to the last character before the current one
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * that has a composition boundary before it with ccc==0 and quick check "yes".
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Keeping track of prevBoundary saves us looking for a composition boundary
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * when we find a "no" or "maybe".
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * When we back out from prevSrc back to prevBoundary,
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * then we also remove those same characters (which had been simply copied
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * or canonically-order-inserted) from the ReorderingBuffer.
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Therefore, at all times, the [prevBoundary..prevSrc[ source units
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * must correspond 1:1 to destination units at the end of the destination buffer.
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=0;
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // only for isNormalized
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevCC=0;
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units below the minimum or with irrelevant data for the quick check
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src; src!=limit;) {
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoMaybeCP ||
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!UTF16.isSurrogate((char)c)) {
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char c2;
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16Plus.isSurrogateLead(c)) {
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint((char)c, c2);
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else /* trail surrogate */ {
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --src;
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint(c2, (char)c);
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // copy these code units all at once
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(src==limit) {
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(doCompose) {
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Set prevBoundary to the last character in the quick check loop.
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src-1;
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    Character.isHighSurrogate(s.charAt(prevBoundary-1))
11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --prevBoundary;
11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(doCompose) {
11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The last "quick check yes" character is excluded from the
11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // flush-and-append call in case it needs to be modified.
11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(s, prevBoundary, src);
11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC=0;
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The start of the current character (c).
11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevSrc=src;
11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(src==limit) {
11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * or has ccc!=0.
11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * Check for Jamo V/T, then for regular characters.
11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * c is not a Hangul syllable or Jamo L because those have "yes" properties.
11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             */
11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char prev=s.charAt(prevSrc-1);
11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                boolean needToDecompose=false;
11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c<Hangul.JAMO_T_BASE) {
11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prev-=Hangul.JAMO_L_BASE;
11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prev<Hangul.JAMO_L_COUNT) {
11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(!doCompose) {
11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return false;
11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char syllable=(char)
11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            (Hangul.HANGUL_BASE+
11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                             (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                             Hangul.JAMO_T_COUNT);
11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char t;
11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(src!=limit && (t=(char)(s.charAt(src)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            ++src;
11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            syllable+=t;  // The next character was a Jamo T.
11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            prevBoundary=src;
11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            buffer.setLastChar(syllable);
11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            continue;
11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // If we see L+V+x where x!=T then we drop to the slow path,
11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // decompose and recompose.
11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // This is to deal with NFKC finding normal L and V but a
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // compatibility variant of a T. We need to either fully compose that
11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // combination here (which would complicate the code and may not work
11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // with strange custom data) or use the slow path -- or else our replacing
11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // two input characters (L+V) with one output character (LV syllable)
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // would violate the invariant that [prevBoundary..prevSrc[ has the same
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // length as what we appended to the buffer since prevBoundary.
11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        needToDecompose=true;
11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(Hangul.isHangulWithoutJamoT(prev)) {
11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // c is a Jamo Trailing consonant,
11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // compose with previous Hangul LV that does not contain a Jamo T.
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(!doCompose) {
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;
11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.setLastChar((char)(prev+c-Hangul.JAMO_T_BASE));
11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary=src;
11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!needToDecompose) {
11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The Jamo V/T did not compose into a Hangul syllable.
11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(doCompose) {
11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.append((char)c);
11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevCC=0;
11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * Source buffer pointers:
11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *
11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *  all done      quick check   current char  not yet
11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                "yes" but     (c)           processed
11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                may combine
11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                forward
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * [-------------[-------------[-------------[-------------[
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * |             |             |             |             |
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * orig. src     prevBoundary  prevSrc       src           limit
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *
12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * Destination buffer pointers inside the ReorderingBuffer:
12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *
12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *  all done      might take    not filled yet
12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                characters for
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                reordering
12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * [-------------[-------------[-------------[
12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * |             |             |             |
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * start         reorderStart  limit         |
12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *                             +remainingCap.+
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             */
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(norm16>=MIN_YES_YES_WITH_CC) {
12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cc=norm16&0xff;  // cc!=0
12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( onlyContiguous &&  // FCC
12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (doCompose ? buffer.getLastCC() : prevCC)==0 &&
12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary<prevSrc &&
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // passed the quick check "yes && ccc==0" test.
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Check whether the last character was a "yesYes" or a "yesNo".
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // If a "yesNo", then we get its trailing ccc from its
12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // mapping and check for canonical order.
12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // All other cases are ok.
12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Fails FCD test, need to decompose and contiguously recompose.
12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(!doCompose) {
12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;
12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(doCompose) {
12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(c, cc);
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(prevCC<=cc) {
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC=cc;
12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * Find appropriate boundaries around this character,
12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * decompose the source text from between the boundaries,
12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * and recompose it.
12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             *
12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * We may need to remove the last few characters from the ReorderingBuffer
12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * to account for source text that was copied or appended
12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * but needs to take part in the recomposition.
12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             */
12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * Find the last composition boundary in [prevBoundary..src[.
12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * It is either the decomposition of the current character (at prevSrc),
12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * or prevBoundary.
12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             */
12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(hasCompBoundaryBefore(c, norm16)) {
12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=prevSrc;
12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(doCompose) {
12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(prevSrc-prevBoundary);
12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Find the next composition boundary in [src..limit[ -
12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // modifies src to point to the next starter.
12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src=findNextCompBoundary(s, src, limit);
12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int recomposeStartIndex=buffer.length();
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decomposeShort(s, prevBoundary, src, buffer);
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            recompose(buffer, recomposeStartIndex, onlyContiguous);
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!doCompose) {
12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!buffer.equals(s, prevBoundary, src)) {
12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.remove();
12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevCC=0;
12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Move to the next starter. We never need to look back before this point again.
12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevBoundary=src;
12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Very similar to compose(): Make the same changes in both places if relevant.
12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * !doSpan: quickCheck
12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         then the quick check result is "no"
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int composeQuickCheck(CharSequence s, int src, int limit,
12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean onlyContiguous, boolean doSpan) {
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int qcResult=0;
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minNoMaybeCP=minCompNoMaybeCP;
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * prevBoundary points to the last character before the current one
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * that has a composition boundary before it with ccc==0 and quick check "yes".
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=0;
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevCC=0;
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units below the minimum or with irrelevant data for the quick check
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src;;) {
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(src==limit) {
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return (src<<1)|qcResult;  // "yes" or "maybe"
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoMaybeCP ||
13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!UTF16.isSurrogate((char)c)) {
13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char c2;
13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16Plus.isSurrogateLead(c)) {
13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint((char)c, c2);
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else /* trail surrogate */ {
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --src;
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint(c2, (char)c);
13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Set prevBoundary to the last character in the quick check loop.
13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src-1;
13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        Character.isHighSurrogate(s.charAt(prevBoundary-1))
13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    --prevBoundary;
13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevCC=0;
13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The start of the current character (c).
13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevSrc=src;
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /*
13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             * or has ccc!=0.
13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller             */
13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isMaybeOrNonZeroCC(norm16)) {
13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cc=getCCFromYesOrMaybe(norm16);
13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( onlyContiguous &&  // FCC
13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    cc!=0 &&
13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC==0 &&
13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary<prevSrc &&
13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // prevCC==0 && prevBoundary<prevSrc tell us that
13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // passed the quick check "yes && ccc==0" test.
13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Check whether the last character was a "yesYes" or a "yesNo".
13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // If a "yesNo", then we get its trailing ccc from its
13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // mapping and check for canonical order.
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // All other cases are ok.
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Fails FCD test.
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(prevCC<=cc || cc==0) {
13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC=cc;
13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(norm16<MIN_YES_YES_WITH_CC) {
13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(!doSpan) {
13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            qcResult=1;
13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return prevBoundary<<1;  // spanYes does not care to know it's "maybe"
13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return prevBoundary<<1;  // "no"
13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void composeAndAppend(CharSequence s,
13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean doCompose,
13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean onlyContiguous,
13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 ReorderingBuffer buffer) {
13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0, limit=s.length();
13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!buffer.isEmpty()) {
13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(0!=firstStarterInSrc) {
13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                               buffer.length());
14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                       firstStarterInSrc+16);
14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(buffer.length()-lastStarterInDest);
14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(s, 0, firstStarterInSrc);
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=firstStarterInSrc;
14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doCompose) {
14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            compose(s, src, limit, onlyContiguous, true, buffer);
14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append(s, src, limit);
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Dual functionality:
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer!=NULL: normalize
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: In this function we use buffer->appendZeroCC() because we track
14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the lead and trail combining classes here, rather than leaving it to
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the ReorderingBuffer.
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The exception is the call to decomposeShort() which uses the buffer
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // in the normal way.
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Similar to the prevBoundary in the compose() implementation.
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevFCD16=0;
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int fcd16=0;
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units with lccc==0
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src; src!=limit;) {
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((c=s.charAt(src))<MIN_CCC_LCCC_CP) {
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=~c;
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=0;
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16.isSurrogate((char)c)) {
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char c2;
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(UTF16Plus.isSurrogateLead(c)) {
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                c=Character.toCodePoint((char)c, c2);
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else /* trail surrogate */ {
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                --src;
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                c=Character.toCodePoint(c2, (char)c);
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if((fcd16=getFCD16FromNormData(c))<=0xff) {
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevFCD16=fcd16;
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // copy these code units all at once
14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(src==limit) {
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(buffer!=null) {
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src;
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We know that the previous character's lccc==0.
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(prevFCD16<0) {
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Fetching the fcd16 value was deferred for this below-U+0300 code point.
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int prev=~prevFCD16;
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prevFCD16>1) {
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        --prevBoundary;
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int p=src-1;
14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        Character.isHighSurrogate(s.charAt(p-1))
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ) {
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        --p;
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Need to fetch the previous character's FCD value because
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // prevFCD16 was just for the trail surrogate code point.
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prevFCD16>1) {
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevBoundary=p;
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The last lccc==0 character is excluded from the
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // flush-and-append call in case it needs to be modified.
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(s, prevBoundary, src);
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The start of the current character (c).
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevSrc=src;
15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(src==limit) {
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Check for proper order, and decompose locally if necessary.
15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((prevFCD16&0xff)<=(fcd16>>8)) {
15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // proper order: prev tccc <= current lccc
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((fcd16&0xff)<=1) {
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary=src;
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.appendZeroCC(c);
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevFCD16=fcd16;
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(buffer==null) {
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return prevBoundary;  // quick check "no"
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Back out the part of the source that we copied or appended
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * already but is now going to be decomposed.
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * prevSrc is set to after what was copied/appended.
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(prevSrc-prevBoundary);
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Find the part of the source that needs to be decomposed,
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * up to the next safe boundary.
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=findNextFCDBoundary(s, src, limit);
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * The source text does not fulfill the conditions for FCD.
15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Decompose and reorder a limited piece of the text.
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decomposeShort(s, prevBoundary, src, buffer);
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src;
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevFCD16=0;
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src;
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0, limit=s.length();
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!buffer.isEmpty()) {
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(0!=firstBoundaryInSrc) {
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                               buffer.length());
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                       firstBoundaryInSrc+16);
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(s, 0, firstBoundaryInSrc);
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                makeFCD(middle, 0, middle.length(), buffer);
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=firstBoundaryInSrc;
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doMakeFCD) {
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            makeFCD(s, src, limit, buffer);
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append(s, src, limit);
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Note: hasDecompBoundary() could be implemented as aliases to
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // at the cost of building the FCD trie for a decomposition normalizer.
15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasDecompBoundary(int c, boolean before) {
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<minDecompNoCP) {
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=getNorm16(c);
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16>MIN_NORMAL_MAYBE_YES) {
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;  // ccc!=0
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c=mapAlgorithmic(c, norm16);
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstUnit=extraData.charAt(norm16);
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!before) {
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // decomp after-boundary: same as hasFCDBoundaryAfter(),
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // fcd16<=1 || trailCC==0
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(firstUnit>0x1ff) {
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;  // trailCC>1
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(firstUnit<=0xff) {
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return true;  // trailCC==0
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // if(trailCC==1) test leadCC==0, same as checking for before-boundary
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // true if leadCC==0 (hasFCDBoundaryBefore())
16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16-1)&0xff00)==0;
16042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
16082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasCompBoundaryBefore(int c) {
16102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
16112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous, boolean testInert) {
16132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
16142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=getNorm16(c);
16152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isInert(norm16)) {
16162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
16172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16<=minYesNo) {
16182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Hangul: norm16==minYesNo
16192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Hangul LVT has a boundary after it.
16202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Hangul LV and non-inert yesYes characters combine forward.
16212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char)c);
16222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
16232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
16242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
16252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c=mapAlgorithmic(c, norm16);
16262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
16272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data.
16282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // If testInert, then c must be a yesNo character which has lccc=0,
16292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // otherwise it could be a noNo.
16302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstUnit=extraData.charAt(norm16);
16312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // true if
16322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   not MAPPING_NO_COMP_BOUNDARY_AFTER
16332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //     (which is set if
16342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //       c is not deleted, and
16352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //       it and its decomposition do not combine forward, and it has a starter)
16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //   and if FCC then trailCC<=1
16372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return
16382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 &&
16392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (!onlyContiguous || firstUnit<=0x1ff);
16402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
16412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasFCDBoundaryBefore(int c) { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
16452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasFCDBoundaryAfter(int c) {
16462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int fcd16=getFCD16(c);
16472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fcd16<=1 || (fcd16&0xff)==0;
16482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isFCDInert(int c) { return getFCD16(c)<=1; }
16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
16522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
16532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isInert(int norm16) { return norm16==0; }
16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isJamoL(int norm16) { return norm16==1; }
16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isHangul(int norm16) { return norm16==minYesNo; }
16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
16582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // UBool isCompYes(uint16_t norm16) const {
16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
16602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // UBool isCompYesOrMaybe(uint16_t norm16) const {
16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16<minNoNo || minMaybeYes<=norm16;
16632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // private boolean hasZeroCCFromDecompYes(int norm16) {
16652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
16662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isDecompYesAndZeroCC(int norm16) {
16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return norm16<minYesNo ||
16692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               norm16==JAMO_VT ||
16702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
16712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the MaybeYes which combine-forward and have ccc=0.
16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (Standard Unicode 5.2 normalization does not have such characters.)
16762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMostDecompYesAndZeroCC(int norm16) {
16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
16792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
16812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // For use with isCompYes().
16832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // static uint8_t getCCFromYes(uint16_t norm16) {
16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCCFromNoNo(int norm16) {
16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return extraData.charAt(norm16-1)&0xff;
16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) {
16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c;
16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(cpStart==(cpLimit-1)) {
16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=s.charAt(cpStart);
16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=Character.codePointAt(s, cpStart);
17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevNorm16=getNorm16(c);
17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(prevNorm16<=minYesNo) {
17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return extraData.charAt(prevNorm16)>>8;  // tccc from yesNo
17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Requires algorithmic-NoNo.
17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int mapAlgorithmic(int c, int norm16) {
17122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return c+norm16-(minMaybeYes-MAX_DELTA-1);
17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Requires minYesNo<norm16<limitNoNo.
17162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // private int getMapping(int norm16) { return /*extraData+*/norm16; }
17172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions, or -1
17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsListForDecompYes(int norm16) {
17222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
17232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
17242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((norm16-=minMaybeYes)<0) {
17262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // norm16<minMaybeYes: index into extraData which is a substring at
17272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return norm16;
17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions
17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsListForComposite(int norm16) {
17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // composite has both mapping & compositions list
17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int firstUnit=extraData.charAt(norm16);
17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16+  // mapping in maybeYesCompositions
17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            1+  // +1 to skip the first unit with the mapping lenth
17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
17432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point must have compositions
17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions
17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsList(int norm16) {
17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return isDecompYes(norm16) ?
17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                getCompositionsListForDecompYes(norm16) :
17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                getCompositionsListForComposite(norm16);
17522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Decompose a short piece of text which is likely to contain characters that
17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // fail the quick check loop and/or where the quick check loop's overhead
17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // is unlikely to be amortized.
17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Called by the compose() and makeFCD() implementations.
17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Public in Java for collation implementation code.
17592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void decomposeShort(CharSequence s, int src, int limit,
17602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                               ReorderingBuffer buffer) {
17612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(src<limit) {
17622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, src);
17632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decompose(c, getNorm16(c), buffer);
17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void decompose(int c, int norm16,
17682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           ReorderingBuffer buffer) {
17692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Only loops for 1:1 algorithmic mappings.
17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // get the decomposition and the lead and trail cc's
17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isDecompYes(norm16)) {
17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c does not decompose
17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c, getCCFromYesOrMaybe(norm16));
17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isHangul(norm16)) {
17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Hangul syllable: decompose algorithmically
17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Hangul.decompose(c, buffer);
17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c=mapAlgorithmic(c, norm16);
17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16=getNorm16(c);
17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data
17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstUnit=extraData.charAt(norm16);
17852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int length=firstUnit&MAPPING_LENGTH_MASK;
17862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int leadCC, trailCC;
17872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                trailCC=firstUnit>>8;
17882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    leadCC=extraData.charAt(norm16-1)>>8;
17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
17912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    leadCC=0;
17922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
17932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ++norm16;  // skip over the firstUnit
17942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(extraData, norm16, norm16+length, leadCC, trailCC);
17952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
17962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
17972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Finds the recomposition result for
18022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a forward-combining "lead" character,
18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified with a pointer to its compositions list,
18042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and a backward-combining "trail" character.
18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>If the lead and trail characters combine, then this function returns
18072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following "compositeAndFwd" value:
18082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <pre>
18092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bits 21..1  composite character
18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bit      0  set if the composite is a forward-combining starter
18112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </pre>
18122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * otherwise it returns -1.
18132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>The compositions list has (trail, compositeAndFwd) pair entries,
18152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * encoded as either pairs or triples of 16-bit units.
18162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The last entry has the high bit of its first unit set.
18172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>The list is sorted by ascending trail characters (there are no duplicates).
18192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A linear search is used.
18202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>See normalizer2impl.h for a more detailed description
18222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the compositions list format.
18232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int combine(String compositions, int list, int trail) {
18252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int key1, firstUnit;
18262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(trail<COMP_1_TRAIL_LIMIT) {
18272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // trail character is 0..33FF
18282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // result entry may have 2 or 3 units
18292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            key1=(trail<<1);
18302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(key1>(firstUnit=compositions.charAt(list))) {
18312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=2+(firstUnit&COMP_1_TRIPLE);
18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&COMP_1_TRIPLE)!=0) {
18352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
18362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return compositions.charAt(list+1);
18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // trail character is 3400..10FFFF
18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // result entry has 3 units
18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int secondUnit;
18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(key1>(firstUnit=compositions.charAt(list))) {
18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    list+=2+(firstUnit&COMP_1_TRIPLE);
18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(key2>(secondUnit=compositions.charAt(list+1))) {
18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            list+=3;
18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param list some character's compositions list
18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param set recursively receives the composites from these compositions
18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void addComposites(int list, UnicodeSet set) {
18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int firstUnit, compositeAndFwd;
18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
18752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            firstUnit=maybeYesCompositions.charAt(list);
18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((firstUnit&COMP_1_TRIPLE)==0) {
18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositeAndFwd=maybeYesCompositions.charAt(list+1);
18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=2;
18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                maybeYesCompositions.charAt(list+2);
18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=3;
18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int composite=compositeAndFwd>>1;
18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((compositeAndFwd&1)!=0) {
18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(composite);
18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while((firstUnit&COMP_1_LAST_TUPLE)==0);
18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Recomposes the buffer text starting at recomposeStartIndex
18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (which is in NFD - decomposed and canonically ordered),
18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and truncates the buffer contents.
18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note that recomposition never lengthens the text:
18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Any character consists of either one or two code units;
18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a composition may contain at most one more code unit than the original starter,
18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * while the combining mark that is removed has at least one code unit.
19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean onlyContiguous) {
19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder sb=buffer.getStringBuilder();
19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p=recomposeStartIndex;
19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(p==sb.length()) {
19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int starter, pRemove;
19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int compositionsList;
19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c, compositeAndFwd;
19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cc, prevCC;
19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean starterIsSupplementary;
19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Some of the following variables are not used until we have a forward-combining starter
19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and are only initialized now to avoid compiler warnings.
19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        starter=-1;
19202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        starterIsSupplementary=false;
19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        prevCC=0;
19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=sb.codePointAt(p);
19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            norm16=getNorm16(c);
19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cc=getCCFromYesOrMaybe(norm16);
19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( // this character combines backward and
19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                isMaybe(norm16) &&
19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we have seen a starter that combines forward and
19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositionsList>=0 &&
19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the backward-combining character is not blocked
19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (prevCC<cc || prevCC==0)
19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ) {
19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isJamoVT(norm16)) {
19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // c is a Jamo V/T, see if we can compose it with the previous character.
19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(c<Hangul.JAMO_T_BASE) {
19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prev<Hangul.JAMO_L_COUNT) {
19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            pRemove=p-1;
19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            char syllable=(char)
19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (Hangul.HANGUL_BASE+
19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 Hangul.JAMO_T_COUNT);
19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            char t;
19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                ++p;
19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                syllable+=t;  // The next character was a Jamo T.
19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, syllable);
19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // remove the Jamo V/T
19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.delete(pRemove, p);
19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p=pRemove;
19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    /*
19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * No "else" for Jamo T:
19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * Since the input is in NFD, there are no Hangul LV syllables that
19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * a Jamo T could combine with.
19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * All Jamo Ts are combined above when handling Jamo Vs.
19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     */
19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(p==sb.length()) {
19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    compositionsList=-1;
19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
19692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The starter and the combining mark (c) do combine.
19702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int composite=compositeAndFwd>>1;
19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Remove the combining mark.
19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sb.delete(pRemove, p);
19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    p=pRemove;
19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Replace the starter with the composite.
19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(starterIsSupplementary) {
19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(composite>0xffff) {
19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // both are supplementary
19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, (char)c);
19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.deleteCharAt(starter+1);
19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // The composite is shorter than the starter,
19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // move the intermediate characters forward one.
19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            starterIsSupplementary=false;
19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --p;
19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(composite>0xffff) {
19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // The composite is longer than the starter,
19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // move the intermediate characters back one.
19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=true;
19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        ++p;
19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // both are on the BMP
19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.setCharAt(starter, (char)composite);
20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Keep prevCC because we removed the combining mark.
20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(p==sb.length()) {
20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Is the composite a starter that combines forward?
20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if((compositeAndFwd&1)!=0) {
20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        compositionsList=
20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            getCompositionsListForComposite(getNorm16(composite));
20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        compositionsList=-1;
20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We combined; continue with looking for compositions.
20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // no combination this time
20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevCC=cc;
20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(p==sb.length()) {
20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If c did not combine, then check if it is a starter.
20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cc==0) {
20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Found a new starter.
20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // It may combine with something, prepare for it.
20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(c<=0xffff) {
20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=false;
20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starter=p-1;
20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=true;
20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starter=p-2;
20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(onlyContiguous) {
20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // FCC: no discontiguous compositions; any intervening character blocks.
20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositionsList=-1;
20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.flush();
20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int composePair(int a, int b) {
20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int list;
20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isInert(norm16)) {
20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(norm16<minYesNoMappingsOnly) {
20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isJamoL(norm16)) {
20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b-=Hangul.JAMO_V_BASE;
20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(0<=b && b<Hangul.JAMO_V_COUNT) {
20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return
20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (Hangul.HANGUL_BASE+
20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)*
20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         Hangul.JAMO_T_COUNT);
20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isHangul(norm16)) {
20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b-=Hangul.JAMO_T_BASE;
20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(Hangul.isHangulWithoutJamoT((char)a) && 0<b && b<Hangul.JAMO_T_COUNT) {  // not b==0!
20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return a+b;
20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // 'a' has a compositions list in extraData
20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list=norm16;
20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    list+=  // mapping pointer
20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        1+  // +1 to skip the first unit with the mapping lenth
20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (extraData.charAt(list)&MAPPING_LENGTH_MASK);  // + mapping length
20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Turn the offset-into-extraData into an offset-into-maybeYesCompositions.
20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=MIN_NORMAL_MAYBE_YES-minMaybeYes;
20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            list=norm16-minMaybeYes;  // offset into maybeYesCompositions
20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return combine(maybeYesCompositions, list, b)>>1;
20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
20932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Does c have a composition boundary before it?
20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * True if its decomposition begins with a character that has
20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (isCompYesAndZeroCC()) so we need not decompose.
20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean hasCompBoundaryBefore(int c, int norm16) {
21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isCompYesAndZeroCC(norm16)) {
21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isMaybeOrNonZeroCC(norm16)) {
21042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(isDecompNoAlgorithmic(norm16)) {
21062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c=mapAlgorithmic(c, norm16);
21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16=getNorm16(c);
21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // c decomposes, get everything from the variable-length extra data
21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstUnit=extraData.charAt(norm16);
21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
21132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16-1)&0xff00)!=0) {
21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;  // non-zero leadCC
21162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
21172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16+1)));
21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findPreviousCompBoundary(CharSequence s, int p) {
21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p>0) {
21232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointBefore(s, p);
21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p-=Character.charCount(c);
21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(hasCompBoundaryBefore(c)) {
21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // but that's probably not worth the extra cost.
21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findNextCompBoundary(CharSequence s, int p, int limit) {
21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p<limit) {
21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, p);
21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=normTrie.get(c);
21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(hasCompBoundaryBefore(c, norm16)) {
21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
21412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findPreviousFCDBoundary(CharSequence s, int p) {
21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p>0) {
21472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointBefore(s, p);
21482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p-=Character.charCount(c);
21492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
21502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findNextFCDBoundary(CharSequence s, int p, int limit) {
21562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p<limit) {
21572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, p);
21582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
21592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void addToStartSet(Trie2Writable newData, int origin, int decompLead) {
21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int canonValue=newData.get(decompLead);
21682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // origin is the first character whose decomposition starts with
21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the character for which we are setting the value.
21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            newData.set(decompLead, canonValue|origin);
21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // origin is not the first character, or it is U+0000.
21742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet set;
21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((canonValue&CANON_HAS_SET)==0) {
21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstOrigin=canonValue&CANON_VALUE_MASK;
21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size();
21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                newData.set(decompLead, canonValue);
21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                canonStartSets.add(set=new UnicodeSet());
21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(firstOrigin!=0) {
21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    set.add(firstOrigin);
21822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
21832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
21842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set=canonStartSets.get(canonValue&CANON_VALUE_MASK);
21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(origin);
21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @SuppressWarnings("unused")
21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private VersionInfo dataVersion;
21922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Code point thresholds for quick check codes.
21942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minDecompNoCP;
21952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minCompNoMaybeCP;
21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Norm16 value thresholds for quick check combinations and types of extra data.
21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minYesNo;
21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minYesNoMappingsOnly;
22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minNoNo;
22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int limitNoNo;
22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minMaybeYes;
22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Trie2_16 normTrie;
22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String maybeYesCompositions;
22062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
22072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int[] tccc180;  // [0x180] tccc values for U+0000..U+017F
22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Trie2_32 canonIterData;
22112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private ArrayList<UnicodeSet> canonStartSets;
22122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // bits in canonIterData
22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000;
22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_HAS_COMPOSITIONS = 0x40000000;
22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_HAS_SET = 0x200000;
22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_VALUE_MASK = 0x1fffff;
22182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
2219