17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
3f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller *   Copyright (C) 2009-2015, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl;
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException;
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UTF16;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ICUUncheckedIOException;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.VersionInfo;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class Normalizer2Impl {
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final class Hangul {
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* Korean Hangul and Jamo constants */
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_L_END=0x1112;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_V_END=0x1175;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_T_END=0x11c2;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int HANGUL_BASE=0xac00;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int HANGUL_END=0xd7a3;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_L_COUNT=19;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_V_COUNT=21;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_T_COUNT=28;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT;
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT;
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT;
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean isHangul(int c) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return HANGUL_BASE<=c && c<HANGUL_LIMIT;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean isHangulWithoutJamoT(char c) {
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c-=HANGUL_BASE;
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean isJamoL(int c) {
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return JAMO_L_BASE<=c && c<JAMO_L_LIMIT;
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean isJamoV(int c) {
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return JAMO_V_BASE<=c && c<JAMO_V_LIMIT;
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Decomposes c, which must be a Hangul syllable, into buffer
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * and returns the length of the decomposition (2 or 3).
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static int decompose(int c, Appendable buffer) {
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c-=HANGUL_BASE;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c2=c%JAMO_T_COUNT;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c/=JAMO_T_COUNT;
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c2==0) {
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 2;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append((char)(JAMO_T_BASE+c2));
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 3;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch(IOException e) {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Will not occur because we do not write to I/O.
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUUncheckedIOException(e);
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Decomposes c, which must be a Hangul syllable, into buffer.
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * This is the raw, not recursive, decomposition. Its length is always 2.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static void getRawDecomposition(int c, Appendable buffer) {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int orig=c;
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c-=HANGUL_BASE;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c2=c%JAMO_T_COUNT;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c2==0) {
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c/=JAMO_T_COUNT;
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append((char)(orig-c2));  // LV syllable
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append((char)(JAMO_T_BASE+c2));
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch(IOException e) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Will not occur because we do not write to I/O.
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUUncheckedIOException(e);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Writable buffer that takes care of canonical ordering.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Its Appendable methods behave like the C++ implementation's
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * appendZeroCC() methods.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If dest is a StringBuilder, then the buffer writes directly to it.
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * until no further changes are necessary and whole segments are appended.
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * append() methods that take combining-class values always write to the StringBuilder.
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Other append() methods flush and append to the Appendable.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final class ReorderingBuffer implements Appendable {
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            impl=ni;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            app=dest;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(app instanceof StringBuilder) {
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                appIsStringBuilder=true;
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str=(StringBuilder)dest;
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // In Java, the constructor subsumes public void init(int destCapacity) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.ensureCapacity(destCapacity);
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=0;
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(str.length()==0) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    lastCC=0;
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    setIterator();
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    lastCC=previousCC();
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Set reorderStart after the last code point with cc<=1 if there is one.
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(lastCC>1) {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        while(previousCC()>1) {}
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=codePointLimit;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                appIsStringBuilder=false;
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str=new StringBuilder();
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=0;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastCC=0;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public boolean isEmpty() { return str.length()==0; }
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int length() { return str.length(); }
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int getLastCC() { return lastCC; }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public StringBuilder getStringBuilder() { return str; }
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public boolean equals(CharSequence s, int start, int limit) {
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // For Hangul composition, replacing the Leading consonant Jamo with the syllable.
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void setLastChar(char c) {
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.setCharAt(str.length()-1, c);
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void append(int c, int cc) {
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(lastCC<=cc || cc==0) {
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.appendCodePoint(c);
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastCC=cc;
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(cc<=1) {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=str.length();
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                insert(c, cc);
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // s must be in NFD, otherwise change the implementation.
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void append(CharSequence s, int start, int limit,
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           int leadCC, int trailCC) {
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(start==limit) {
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(lastCC<=leadCC || leadCC==0) {
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(trailCC<=1) {
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=str.length()+(limit-start);
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(leadCC<=1) {
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=str.length()+1;  // Ok if not a code point boundary.
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(s, start, limit);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastCC=trailCC;
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c=Character.codePointAt(s, start);
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start+=Character.charCount(c);
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                insert(c, leadCC);  // insert first code point
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while(start<limit) {
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c=Character.codePointAt(s, start);
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    start+=Character.charCount(c);
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(start<limit) {
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // s must be in NFD, otherwise we need to use getCC().
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        leadCC=trailCC;
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    append(c, leadCC);
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The following append() methods work like C++ appendZeroCC().
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // They assume that the cc or trailCC of their input is 0.
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Most of them implement Appendable interface methods.
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // @Override when we switch to Java 6
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public ReorderingBuffer append(char c) {
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.append(c);
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reorderStart=str.length();
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void appendZeroCC(int c) {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.appendCodePoint(c);
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reorderStart=str.length();
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // @Override when we switch to Java 6
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public ReorderingBuffer append(CharSequence s) {
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(s.length()!=0) {
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(s);
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastCC=0;
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=str.length();
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // @Override when we switch to Java 6
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public ReorderingBuffer append(CharSequence s, int start, int limit) {
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(start!=limit) {
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(s, start, limit);
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                lastCC=0;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=str.length();
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Flushes from the intermediate StringBuilder to the Appendable,
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * if they are different objects.
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Used after recomposition.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Must be called at the end when writing to a non-StringBuilder Appendable.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void flush() {
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(appIsStringBuilder) {
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=str.length();
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    app.append(str);
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    str.setLength(0);
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=0;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(IOException e) {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Flushes from the intermediate StringBuilder to the Appendable,
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * if they are different objects.
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Then appends the new text to the Appendable or StringBuilder.
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Normally used after quick check loops find a non-empty sequence.
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(appIsStringBuilder) {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.append(s, start, limit);
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                reorderStart=str.length();
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                try {
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    app.append(str).append(s, start, limit);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    str.setLength(0);
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=0;
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } catch(IOException e) {
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void remove() {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.setLength(0);
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reorderStart=0;
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public void removeSuffix(int suffixLength) {
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oldLength=str.length();
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            str.delete(oldLength-suffixLength, oldLength);
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastCC=0;
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            reorderStart=str.length();
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * TODO: Revisit whether it makes sense to track reorderStart.
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * It is set to after the last known character with cc<=1,
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * which stops previousCC() before it reads that character and looks up its cc.
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * previousCC() is normally only called from insert().
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * In other words, reorderStart speeds up the insertion of a combining mark
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * into a multi-combining mark sequence where it does not belong at the end.
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * This might not be worth the trouble.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * On the other hand, it's not a huge amount of trouble.
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * We probably need it for UNORM_SIMPLE_APPEND.
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Inserts c somewhere before the last character.
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Requires 0<cc<lastCC which implies reorderStart<limit.
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void insert(int c, int cc) {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(setIterator(), skipPrevious(); previousCC()>cc;) {}
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // insert c at codePointLimit, after the character with prevCC<=cc
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<=0xffff) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.insert(codePointLimit, (char)c);
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(cc<=1) {
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=codePointLimit+1;
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                str.insert(codePointLimit, Character.toChars(c));
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(cc<=1) {
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    reorderStart=codePointLimit+2;
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final Normalizer2Impl impl;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final Appendable app;
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final StringBuilder str;
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final boolean appIsStringBuilder;
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int reorderStart;
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int lastCC;
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // private backward iterator
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void setIterator() { codePointStart=str.length(); }
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private void skipPrevious() {  // Requires 0<codePointStart.
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            codePointLimit=codePointStart;
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            codePointStart=str.offsetByCodePoints(codePointStart, -1);
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int previousCC() {  // Returns 0 if there is no previous character.
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            codePointLimit=codePointStart;
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(reorderStart>=codePointStart) {
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=str.codePointBefore(codePointStart);
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            codePointStart-=Character.charCount(c);
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<MIN_CCC_LCCC_CP) {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return getCCFromYesOrMaybe(impl.getNorm16(c));
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private int codePointStart, codePointLimit;
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: Propose as public API on the UTF16 class.
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: Propose widening UTF16 methods that take char to take int.
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // TODO: Propose widening UTF16 methods that take String to take CharSequence.
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final class UTF16Plus {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * is it a lead surrogate?
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param c code unit or code point
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return true or false
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Compares two CharSequence objects for binary equality.
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param s1 first sequence
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param s2 second sequence
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return true if s1 contains the same text as s2
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean equal(CharSequence s1,  CharSequence s2) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(s1==s2) {
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length=s1.length();
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(length!=s2.length()) {
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int i=0; i<length; ++i) {
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(s1.charAt(i)!=s2.charAt(i)) {
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Compares two CharSequence subsequences for binary equality.
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param s1 first sequence
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param start1 start offset in first sequence
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param limit1 limit offset in first sequence
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param s2 second sequence
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param start2 start offset in second sequence
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @param limit2 limit offset in second sequence
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * @return true if s1.subSequence(start1, limit1) contains the same text
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *              as s2.subSequence(start2, limit2)
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static boolean equal(CharSequence s1, int start1, int limit1,
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    CharSequence s2, int start2, int limit2) {
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((limit1-start1)!=(limit2-start2)) {
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(s1==s2 && start1==start2) {
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(start1<limit1) {
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(s1.charAt(start1++)!=s2.charAt(start2++)) {
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return true;
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer2Impl() {}
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class IsAcceptable implements ICUBinary.Authenticate {
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // @Override when we switch to Java 6
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public boolean isDataVersionAcceptable(byte version[]) {
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return version[0]==2;
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer2Impl load(ByteBuffer bytes) {
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        try {
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(indexesLength<=IX_MIN_MAYBE_YES) {
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int[] inIndexes=new int[indexesLength];
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            inIndexes[0]=indexesLength*4;
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int i=1; i<indexesLength; ++i) {
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                inIndexes[i]=bytes.getInt();
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minYesNo=inIndexes[IX_MIN_YES_NO];
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minNoNo=inIndexes[IX_MIN_NO_NO];
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            limitNoNo=inIndexes[IX_LIMIT_NO_NO];
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Read the normTrie.
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int offset=inIndexes[IX_NORM_TRIE_OFFSET];
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            normTrie=Trie2_16.createFromSerialized(bytes);
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int trieLength=normTrie.getSerializedLength();
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(trieLength>(nextOffset-offset)) {
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie");
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Read the composition and mapping data.
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset=nextOffset;
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int numChars=(nextOffset-offset)/2;
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(numChars!=0) {
458f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller                maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                extraData=maybeYesCompositions.substring(MIN_NORMAL_MAYBE_YES-minMaybeYes);
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // smallFCD: new in formatVersion 2
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset=nextOffset;
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            smallFCD=new byte[0x100];
465f8a0c400bbd62a2ea4ee9b77641f79cb443d2187Neil Fuller            bytes.get(smallFCD);
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Build tccc180[].
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // gennorm2 enforces lccc=0 for c<MIN_CCC_LCCC_CP=U+0300.
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tccc180=new int[0x180];
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int bits=0;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int c=0; c<0x180; bits>>=1) {
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((c&0xff)==0) {
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bits=smallFCD[c>>8];  // one byte per 0x100 code points
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((bits&1)!=0) {
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for(int i=0; i<0x20; ++i, ++c) {
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        tccc180[c]=getFCD16FromNormData(c)&0xff;
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    c+=0x20;
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } catch(IOException e) {
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new ICUUncheckedIOException(e);
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer2Impl load(String name) {
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return load(ICUBinary.getRequiredData(name));
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isAlgorithmicNoNo(norm16)) {
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Range of code points with same-norm16-value algorithmic decompositions.
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // They might have different non-zero FCD16 values.
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            do {
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int fcd16=getFCD16(start);
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(fcd16>0xff) { set.add(start); }
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } while(++start<=end);
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int fcd16=getFCD16(start);
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(fcd16>0xff) { set.add(start, end); }
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) {
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add the start code point to the USet */
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        set.add(start);
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(start!=end && isAlgorithmicNoNo(value)) {
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Range of code points with same-norm16-value algorithmic decompositions.
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // They might have different non-zero FCD16 values.
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int prevFCD16=getFCD16(start);
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(++start<=end) {
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int fcd16=getFCD16(start);
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(fcd16!=prevFCD16) {
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(start);
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevFCD16=fcd16;
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addLcccChars(UnicodeSet set) {
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add the start code point of each same-value range of each trie */
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set);
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addPropertyStarts(UnicodeSet set) {
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add the start code point of each same-value range of each trie */
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set);
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add Hangul LV syllables and LV+1 because of skippables */
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) {
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(c);
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(c+1);
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addCanonIterPropertyStarts(UnicodeSet set) {
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /* add the start code point of each same-value range of the canonical iterator data trie */
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ensureCanonIterData();
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // currently only used for the SEGMENT_STARTER property
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator=canonIterData.iterator(segmentStarterMapper);
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* add the start code point to the USet */
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(range.startCodePoint);
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() {
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public int map(int in) {
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return in&CANON_NOT_SEGMENT_STARTER;
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // low-level properties ------------------------------------------------ ***
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Trie2_16 getNormTrie() { return normTrie; }
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Note: Normalizer2Impl.java r30983 (2011-nov-27)
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // still had getFCDTrie() which built and cached an FCD trie.
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // That provided faster access to FCD data than getFCD16FromNormData()
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // but required synchronization and consumed some 10kB of heap memory
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // in any process that uses FCD (e.g., via collation).
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // tccc180[] and smallFCD[] are intended to help with any loss of performance,
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // at least for Latin & CJK.
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Builds the canonical-iterator data for this instance.
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is required before any of {@link #isCanonSegmentStarter(int)} or
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * {@link #getCanonStartSet(int, UnicodeSet)} are called,
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or else they crash.
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return this
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public synchronized Normalizer2Impl ensureCanonIterData() {
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(canonIterData==null) {
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Trie2Writable newData=new Trie2Writable(0, 0);
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            canonStartSets=new ArrayList<UnicodeSet>();
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Trie2.Range range;
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                final int norm16=range.value;
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(norm16==0 || (minYesNo<=norm16 && norm16<minNoNo)) {
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Inert, or 2-way mapping (including Hangul syllable).
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // We do not write a canonStartSet for any yesNo character.
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Composites from 2-way mappings are added at runtime from the
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // starter's compositions list, and the other characters in
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // "maybe" characters.
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(int c=range.startCodePoint; c<=range.endCodePoint; ++c) {
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    final int oldValue=newData.get(c);
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int newValue=oldValue;
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(norm16>=minMaybeYes) {
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // not a segment starter if it occurs in a decomposition or has cc!=0
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        newValue|=CANON_NOT_SEGMENT_STARTER;
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(norm16<MIN_NORMAL_MAYBE_YES) {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            newValue|=CANON_HAS_COMPOSITIONS;
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(norm16<minYesNo) {
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        newValue|=CANON_HAS_COMPOSITIONS;
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // c has a one-way decomposition
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int c2=c;
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        int norm16_2=norm16;
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        while(limitNoNo<=norm16_2 && norm16_2<minMaybeYes) {
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c2=this.mapAlgorithmic(c2, norm16_2);
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            norm16_2=getNorm16(c2);
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(minYesNo<=norm16_2 && norm16_2<limitNoNo) {
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // c decomposes, get everything from the variable-length extra data
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            int firstUnit=extraData.charAt(norm16_2);
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            int length=firstUnit&MAPPING_LENGTH_MASK;
6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(c==c2 && (extraData.charAt(norm16_2-1)&0xff)!=0) {
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // Skip empty mappings (no characters in the decomposition).
6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(length!=0) {
6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                ++norm16_2;  // skip over the firstUnit
6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // add c to first code point's start set
6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                int limit=norm16_2+length;
6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                c2=extraData.codePointAt(norm16_2);
6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                addToStartSet(newData, c, c2);
6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // one-way mapping. A 2-way mapping is possible here after
6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // intermediate algorithmic mapping.
6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(norm16_2>=minNoNo) {
6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    while((norm16_2+=Character.charCount(c2))<limit) {
6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        c2=extraData.codePointAt(norm16_2);
6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        int c2Value=newData.get(c2);
6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                            newData.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER);
6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        }
6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    }
6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // c decomposed to c2 algorithmically; c has cc==0
6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            addToStartSet(newData, c, c2);
6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(newValue!=oldValue) {
6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        newData.set(c, newValue);
6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            canonIterData=newData.toTrie2_32();
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return this;
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getNorm16(int c) { return normTrie.get(c); }
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getCompQuickCheck(int norm16) {
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 1;  // yes
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(minMaybeYes<=norm16) {
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 2;  // maybe
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;  // no
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getCC(int norm16) {
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm16>=MIN_NORMAL_MAYBE_YES) {
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return norm16&0xff;
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm16<minNoNo || limitNoNo<=norm16) {
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getCCFromNoNo(norm16);
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static int getCCFromYesOrMaybe(int norm16) {
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return norm16>=MIN_NORMAL_MAYBE_YES ? norm16&0xff : 0;
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the FCD data for code point c.
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c A Unicode code point.
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getFCD16(int c) {
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(c<0) {
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(c<0x180) {
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return tccc180[c];
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(c<=0xffff) {
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return getFCD16FromNormData(c);
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns the FCD data for U+0000<=c<U+0180. */
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getFCD16FromBelow180(int c) { return tccc180[c]; }
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 0<=lead<=0xffff
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte bits=smallFCD[lead>>8];
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(bits==0) { return false; }
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((bits>>((lead>>5)&7))&1)!=0;
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Gets the FCD value from the regular normalization data. */
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getFCD16FromNormData(int c) {
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Only loops for 1:1 algorithmic mappings.
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int norm16=getNorm16(c);
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(norm16<=minYesNo) {
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // no decomposition or Hangul syllable, all zeros
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(norm16>=MIN_NORMAL_MAYBE_YES) {
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // combining mark
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                norm16&=0xff;
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return norm16|(norm16<<8);
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(norm16>=minMaybeYes) {
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return 0;
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=mapAlgorithmic(c, norm16);
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstUnit=extraData.charAt(norm16);
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // A character that is deleted (maps to an empty string) must
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // get the worst-case lccc and tccc values because arbitrary
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // characters on both sides will become adjacent.
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return 0x1ff;
7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int fcd16=firstUnit>>8;  // tccc
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        fcd16|=extraData.charAt(norm16-1)&0xff00;  // lccc
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return fcd16;
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the decomposition for one code point.
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c code point
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return c's decomposition, if it has one; returns null if it does not have a decomposition
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getDecomposition(int c) {
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int decomp=-1;
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16;
7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c does not decompose
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isHangul(norm16)) {
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Hangul syllable: decompose algorithmically
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder buffer=new StringBuilder();
7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Hangul.decompose(c, buffer);
7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buffer.toString();
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decomp=c=mapAlgorithmic(c, norm16);
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return extraData.substring(norm16, norm16+length);
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(decomp<0) {
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return null;
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return UTF16.valueOf(decomp);
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Gets the raw decomposition for one code point.
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c code point
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String getRawDecomposition(int c) {
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We do not loop in this method because an algorithmic mapping itself
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // becomes a final result rather than having to be decomposed recursively.
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16;
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // c does not decompose
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return null;
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(isHangul(norm16)) {
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Hangul syllable: decompose algorithmically
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder buffer=new StringBuilder();
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Hangul.getRawDecomposition(c, buffer);
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buffer.toString();
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(isDecompNoAlgorithmic(norm16)) {
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return UTF16.valueOf(mapAlgorithmic(c, norm16));
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // c decomposes, get everything from the variable-length extra data
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int firstUnit=extraData.charAt(norm16);
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) {
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int rawMapping=norm16-((firstUnit>>7)&1)-1;
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char rm0=extraData.charAt(rawMapping);
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(rm0<=MAPPING_LENGTH_MASK) {
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return extraData.substring(rawMapping-rm0, rawMapping);
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Copy the normal mapping and replace its first two code units with rm0.
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    StringBuilder buffer=new StringBuilder(mLength-1).append(rm0);
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    norm16+=1+2;  // skip over the firstUnit and the first two mapping code units
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return buffer.append(extraData, norm16, norm16+mLength-2).toString();
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                norm16+=1;  // skip over the firstUnit
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return extraData.substring(norm16, norm16+mLength);
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns true if code point c starts a canonical-iterator string segment.
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or else this method will crash.</b>
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c A Unicode code point.
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if c starts a canonical-iterator string segment.
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCanonSegmentStarter(int c) {
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return canonIterData.get(c)>=0;
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns true if there are characters whose decomposition starts with c.
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If so, then the set is cleared and then filled with those characters.
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or else this method will crash.</b>
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c A Unicode code point.
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param set A UnicodeSet to receive the characters whose decompositions
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *        start with c, if there are any.
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if there are characters whose decomposition starts with c.
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean getCanonStartSet(int c, UnicodeSet set) {
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER;
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(canonValue==0) {
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        set.clear();
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int value=canonValue&CANON_VALUE_MASK;
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((canonValue&CANON_HAS_SET)!=0) {
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.addAll(canonStartSets.get(value));
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(value!=0) {
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(value);
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int norm16=getNorm16(c);
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(norm16==JAMO_L) {
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT;
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1);
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addComposites(getCompositionsList(norm16), set);
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MIN_CCC_LCCC_CP=0x300;
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MIN_YES_YES_WITH_CC=0xff01;
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int JAMO_VT=0xff00;
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MIN_NORMAL_MAYBE_YES=0xfe00;
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int JAMO_L=1;
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAX_DELTA=0x40;
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Byte offsets from the start of the data, after the generic header.
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_NORM_TRIE_OFFSET=0;
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_EXTRA_DATA_OFFSET=1;
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_SMALL_FCD_OFFSET=2;
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_RESERVED3_OFFSET=3;
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_TOTAL_SIZE=7;
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Code point thresholds for quick check codes.
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_DECOMP_NO_CP=8;
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Norm16 value thresholds for quick check combinations and types of extra data.
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Mappings & compositions in [minYesNo..minYesNoMappingsOnly[.
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_YES_NO=10;
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_NO_NO=11;
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_LIMIT_NO_NO=12;
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_MAYBE_YES=13;
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Mappings only in [minYesNoMappingsOnly..minNoNo[.
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IX_COUNT=16;
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAPPING_HAS_RAW_MAPPING=0x40;
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAPPING_NO_COMP_BOUNDARY_AFTER=0x20;
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MAPPING_LENGTH_MASK=0x1f;
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_1_LAST_TUPLE=0x8000;
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_1_TRIPLE=1;
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_1_TRAIL_LIMIT=0x3400;
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_1_TRAIL_MASK=0x7ffe;
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_2_TRAIL_SHIFT=6;
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMP_2_TRAIL_MASK=0xffc0;
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // higher-level functionality ------------------------------------------ ***
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // NFD without an NFD Normalizer2 instance.
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Appendable decompose(CharSequence s, StringBuilder dest) {
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        decompose(s, 0, s.length(), dest, s.length());
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decomposes s[src, limit[ and writes the result to dest.
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * limit can be NULL if src is NUL-terminated.
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * destLengthEstimate is the initial dest buffer capacity and can be -1.
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                   int destLengthEstimate) {
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(destLengthEstimate<0) {
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            destLengthEstimate=limit-src;
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dest.setLength(0);
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        decompose(s, src, limit, buffer);
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Dual functionality:
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // buffer!=NULL: normalize
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int decompose(CharSequence s, int src, int limit,
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         ReorderingBuffer buffer) {
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minNoCP=minDecompNoCP;
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevSrc;
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=0;
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16=0;
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // only for quick check
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevBoundary=src;
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevCC=0;
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // count code units below the minimum or with irrelevant data for the quick check
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(prevSrc=src; src!=limit;) {
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( (c=s.charAt(src))<minNoCP ||
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++src;
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(!UTF16.isSurrogate((char)c)) {
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    char c2;
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(UTF16Plus.isSurrogateLead(c)) {
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint((char)c, c2);
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* trail surrogate */ {
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            --src;
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint(c2, (char)c);
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        src+=Character.charCount(c);
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // copy these code units all at once
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src!=prevSrc) {
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(buffer!=null) {
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.flushAndAppendZeroCC(s, prevSrc, src);
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevCC=0;
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevBoundary=src;
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src==limit) {
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check one above-minimum, relevant code point.
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(buffer!=null) {
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decompose(c, norm16, buffer);
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isDecompYes(norm16)) {
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int cc=getCCFromYesOrMaybe(norm16);
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(prevCC<=cc || cc==0) {
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        prevCC=cc;
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(cc<=1) {
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            prevBoundary=src;
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        continue;
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return prevBoundary;  // "no" or cc out of order
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return src;
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit=s.length();
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(limit==0) {
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(doDecompose) {
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            decompose(s, 0, limit, buffer);
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Just merge the strings at the boundary.
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=Character.codePointAt(s, 0);
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int src=0;
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstCC, prevCC, cc;
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        firstCC=prevCC=cc=getCC(getNorm16(c));
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(cc!=0) {
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevCC=cc;
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src>=limit) {
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=Character.codePointAt(s, src);
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cc=getCC(getNorm16(c));
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        };
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer.append(s, 0, src, firstCC, prevCC);
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer.append(s, src, limit);
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // doCompose: normalize
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // !doCompose: isNormalized (buffer must be empty and initialized)
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean compose(CharSequence s, int src, int limit,
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           boolean onlyContiguous,
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           boolean doCompose,
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           ReorderingBuffer buffer) {
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minNoMaybeCP=minCompNoMaybeCP;
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * prevBoundary points to the last character before the current one
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * that has a composition boundary before it with ccc==0 and quick check "yes".
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Keeping track of prevBoundary saves us looking for a composition boundary
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * when we find a "no" or "maybe".
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         *
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * When we back out from prevSrc back to prevBoundary,
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * then we also remove those same characters (which had been simply copied
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * or canonically-order-inserted) from the ReorderingBuffer.
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Therefore, at all times, the [prevBoundary..prevSrc[ source units
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * must correspond 1:1 to destination units at the end of the destination buffer.
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevBoundary=src;
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevSrc;
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=0;
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16=0;
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // only for isNormalized
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevCC=0;
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // count code units below the minimum or with irrelevant data for the quick check
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(prevSrc=src; src!=limit;) {
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( (c=s.charAt(src))<minNoMaybeCP ||
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++src;
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(!UTF16.isSurrogate((char)c)) {
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    char c2;
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(UTF16Plus.isSurrogateLead(c)) {
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint((char)c, c2);
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* trail surrogate */ {
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            --src;
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint(c2, (char)c);
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        src+=Character.charCount(c);
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // copy these code units all at once
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src!=prevSrc) {
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(src==limit) {
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(doCompose) {
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Set prevBoundary to the last character in the quick check loop.
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevBoundary=src-1;
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    Character.isHighSurrogate(s.charAt(prevBoundary-1))
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    --prevBoundary;
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(doCompose) {
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The last "quick check yes" character is excluded from the
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // flush-and-append call in case it needs to be modified.
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append(s, prevBoundary, src);
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevCC=0;
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The start of the current character (c).
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevSrc=src;
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(src==limit) {
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * or has ccc!=0.
11317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Check for Jamo V/T, then for regular characters.
11327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * c is not a Hangul syllable or Jamo L because those have "yes" properties.
11337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isJamoVT(norm16) && prevBoundary!=prevSrc) {
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char prev=s.charAt(prevSrc-1);
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean needToDecompose=false;
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(c<Hangul.JAMO_T_BASE) {
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prev-=Hangul.JAMO_L_BASE;
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(prev<Hangul.JAMO_L_COUNT) {
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(!doCompose) {
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return false;
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char syllable=(char)
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (Hangul.HANGUL_BASE+
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                             (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                             Hangul.JAMO_T_COUNT);
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char t;
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(src!=limit && (t=(char)(s.charAt(src)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            ++src;
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            syllable+=t;  // The next character was a Jamo T.
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            prevBoundary=src;
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            buffer.setLastChar(syllable);
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            continue;
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // If we see L+V+x where x!=T then we drop to the slow path,
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // decompose and recompose.
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // This is to deal with NFKC finding normal L and V but a
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // compatibility variant of a T. We need to either fully compose that
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // combination here (which would complicate the code and may not work
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // with strange custom data) or use the slow path -- or else our replacing
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // two input characters (L+V) with one output character (LV syllable)
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // would violate the invariant that [prevBoundary..prevSrc[ has the same
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // length as what we appended to the buffer since prevBoundary.
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        needToDecompose=true;
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(Hangul.isHangulWithoutJamoT(prev)) {
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // c is a Jamo Trailing consonant,
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // compose with previous Hangul LV that does not contain a Jamo T.
11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!doCompose) {
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.setLastChar((char)(prev+c-Hangul.JAMO_T_BASE));
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevBoundary=src;
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!needToDecompose) {
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The Jamo V/T did not compose into a Hangul syllable.
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(doCompose) {
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        buffer.append((char)c);
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        prevCC=0;
11837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
11857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
11867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
11887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Source buffer pointers:
11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *  all done      quick check   current char  not yet
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                "yes" but     (c)           processed
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                may combine
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                forward
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * [-------------[-------------[-------------[-------------[
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * |             |             |             |             |
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * orig. src     prevBoundary  prevSrc       src           limit
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Destination buffer pointers inside the ReorderingBuffer:
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *  all done      might take    not filled yet
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                characters for
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                reordering
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * [-------------[-------------[-------------[
12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * |             |             |             |
12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * start         reorderStart  limit         |
12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *                             +remainingCap.+
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(norm16>=MIN_YES_YES_WITH_CC) {
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int cc=norm16&0xff;  // cc!=0
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( onlyContiguous &&  // FCC
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (doCompose ? buffer.getLastCC() : prevCC)==0 &&
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevBoundary<prevSrc &&
12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // buffer.getLastCC()==0 && prevBoundary<prevSrc tell us that
12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // passed the quick check "yes && ccc==0" test.
12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Check whether the last character was a "yesYes" or a "yesNo".
12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If a "yesNo", then we get its trailing ccc from its
12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // mapping and check for canonical order.
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // All other cases are ok.
12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Fails FCD test, need to decompose and contiguously recompose.
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(!doCompose) {
12257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
12267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(doCompose) {
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append(c, cc);
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(prevCC<=cc) {
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevCC=cc;
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(!doCompose && !isMaybeOrNonZeroCC(norm16)) {
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
12417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Find appropriate boundaries around this character,
12427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * decompose the source text from between the boundaries,
12437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * and recompose it.
12447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * We may need to remove the last few characters from the ReorderingBuffer
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * to account for source text that was copied or appended
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * but needs to take part in the recomposition.
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Find the last composition boundary in [prevBoundary..src[.
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * It is either the decomposition of the current character (at prevSrc),
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * or prevBoundary.
12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasCompBoundaryBefore(c, norm16)) {
12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevBoundary=prevSrc;
12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(doCompose) {
12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.removeSuffix(prevSrc-prevBoundary);
12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Find the next composition boundary in [src..limit[ -
12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // modifies src to point to the next starter.
12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src=findNextCompBoundary(s, src, limit);
12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Decompose [prevBoundary..src[ into the buffer and then recompose that part of it.
12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int recomposeStartIndex=buffer.length();
12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            decomposeShort(s, prevBoundary, src, buffer);
12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            recompose(buffer, recomposeStartIndex, onlyContiguous);
12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!doCompose) {
12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!buffer.equals(s, prevBoundary, src)) {
12717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.remove();
12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevCC=0;
12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Move to the next starter. We never need to look back before this point again.
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevBoundary=src;
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Very similar to compose(): Make the same changes in both places if relevant.
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * !doSpan: quickCheck
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         then the quick check result is "no"
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int composeQuickCheck(CharSequence s, int src, int limit,
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 boolean onlyContiguous, boolean doSpan) {
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int qcResult=0;
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int minNoMaybeCP=minCompNoMaybeCP;
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /*
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * prevBoundary points to the last character before the current one
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * that has a composition boundary before it with ccc==0 and quick check "yes".
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevBoundary=src;
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevSrc;
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=0;
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16=0;
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevCC=0;
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // count code units below the minimum or with irrelevant data for the quick check
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(prevSrc=src;;) {
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(src==limit) {
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return (src<<1)|qcResult;  // "yes" or "maybe"
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( (c=s.charAt(src))<minNoMaybeCP ||
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++src;
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(!UTF16.isSurrogate((char)c)) {
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    char c2;
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(UTF16Plus.isSurrogateLead(c)) {
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint((char)c, c2);
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else /* trail surrogate */ {
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            --src;
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c=Character.toCodePoint(c2, (char)c);
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(isCompYesAndZeroCC(norm16=getNorm16(c))) {
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        src+=Character.charCount(c);
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src!=prevSrc) {
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Set prevBoundary to the last character in the quick check loop.
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevBoundary=src-1;
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( Character.isLowSurrogate(s.charAt(prevBoundary)) && prevSrc<prevBoundary &&
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        Character.isHighSurrogate(s.charAt(prevBoundary-1))
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    --prevBoundary;
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevCC=0;
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The start of the current character (c).
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevSrc=src;
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /*
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * c is either a "noNo" (has a mapping) or a "maybeYes" (combines backward)
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * or has ccc!=0.
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isMaybeOrNonZeroCC(norm16)) {
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int cc=getCCFromYesOrMaybe(norm16);
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if( onlyContiguous &&  // FCC
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    cc!=0 &&
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevCC==0 &&
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevBoundary<prevSrc &&
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // prevCC==0 && prevBoundary<prevSrc tell us that
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // [prevBoundary..prevSrc[ (which is exactly one character under these conditions)
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // passed the quick check "yes && ccc==0" test.
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Check whether the last character was a "yesYes" or a "yesNo".
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If a "yesNo", then we get its trailing ccc from its
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // mapping and check for canonical order.
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // All other cases are ok.
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    getTrailCCFromCompYesAndZeroCC(s, prevBoundary, prevSrc)>cc
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ) {
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Fails FCD test.
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(prevCC<=cc || cc==0) {
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevCC=cc;
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(norm16<MIN_YES_YES_WITH_CC) {
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(!doSpan) {
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            qcResult=1;
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return prevBoundary<<1;  // spanYes does not care to know it's "maybe"
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return prevBoundary<<1;  // "no"
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void composeAndAppend(CharSequence s,
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 boolean doCompose,
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 boolean onlyContiguous,
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 ReorderingBuffer buffer) {
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int src=0, limit=s.length();
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!buffer.isEmpty()) {
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int firstStarterInSrc=findNextCompBoundary(s, 0, limit);
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(0!=firstStarterInSrc) {
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                               buffer.length());
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                       firstStarterInSrc+16);
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.removeSuffix(buffer.length()-lastStarterInDest);
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                middle.append(s, 0, firstStarterInSrc);
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                src=firstStarterInSrc;
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(doCompose) {
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            compose(s, src, limit, onlyContiguous, true, buffer);
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buffer.append(s, src, limit);
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Dual functionality:
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // buffer!=NULL: normalize
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: In this function we use buffer->appendZeroCC() because we track
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the lead and trail combining classes here, rather than leaving it to
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the ReorderingBuffer.
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The exception is the call to decomposeShort() which uses the buffer
14197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // in the normal way.
14207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Similar to the prevBoundary in the compose() implementation.
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevBoundary=src;
14247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevSrc;
14257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c=0;
14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevFCD16=0;
14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int fcd16=0;
14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // count code units with lccc==0
14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(prevSrc=src; src!=limit;) {
14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((c=s.charAt(src))<MIN_CCC_LCCC_CP) {
14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevFCD16=~c;
14347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++src;
14357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
14367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevFCD16=0;
14377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++src;
14387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
14397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(UTF16.isSurrogate((char)c)) {
14407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char c2;
14417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(UTF16Plus.isSurrogateLead(c)) {
14427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
14437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                c=Character.toCodePoint((char)c, c2);
14447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
14457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else /* trail surrogate */ {
14467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
14477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                --src;
14487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                c=Character.toCodePoint(c2, (char)c);
14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if((fcd16=getFCD16FromNormData(c))<=0xff) {
14537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        prevFCD16=fcd16;
14547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        src+=Character.charCount(c);
14557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
14567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
14577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // copy these code units all at once
14617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(src!=prevSrc) {
14627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(src==limit) {
14637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(buffer!=null) {
14647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
14657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
14677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevBoundary=src;
14697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // We know that the previous character's lccc==0.
14707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(prevFCD16<0) {
14717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Fetching the fcd16 value was deferred for this below-U+0300 code point.
14727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int prev=~prevFCD16;
14737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevFCD16= prev<0x180 ? tccc180[prev] : getFCD16FromNormData(prev);
14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(prevFCD16>1) {
14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --prevBoundary;
14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int p=src-1;
14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        Character.isHighSurrogate(s.charAt(p-1))
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ) {
14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        --p;
14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Need to fetch the previous character's FCD value because
14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // prevFCD16 was just for the trail surrogate code point.
14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(prevFCD16>1) {
14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        prevBoundary=p;
14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(buffer!=null) {
14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The last lccc==0 character is excluded from the
14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // flush-and-append call in case it needs to be modified.
14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.append(s, prevBoundary, src);
14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The start of the current character (c).
14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevSrc=src;
15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(src==limit) {
15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Check for proper order, and decompose locally if necessary.
15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((prevFCD16&0xff)<=(fcd16>>8)) {
15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // proper order: prev tccc <= current lccc
15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((fcd16&0xff)<=1) {
15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    prevBoundary=src;
15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(buffer!=null) {
15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    buffer.appendZeroCC(c);
15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevFCD16=fcd16;
15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(buffer==null) {
15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return prevBoundary;  // quick check "no"
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /*
15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * Back out the part of the source that we copied or appended
15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * already but is now going to be decomposed.
15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * prevSrc is set to after what was copied/appended.
15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 */
15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.removeSuffix(prevSrc-prevBoundary);
15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /*
15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * Find the part of the source that needs to be decomposed,
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * up to the next safe boundary.
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 */
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                src=findNextFCDBoundary(s, src, limit);
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                /*
15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * The source text does not fulfill the conditions for FCD.
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 * Decompose and reorder a limited piece of the text.
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                 */
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decomposeShort(s, prevBoundary, src, buffer);
15367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevBoundary=src;
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                prevFCD16=0;
15387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return src;
15417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int src=0, limit=s.length();
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!buffer.isEmpty()) {
15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(0!=firstBoundaryInSrc) {
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                               buffer.length());
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                       firstBoundaryInSrc+16);
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                middle.append(s, 0, firstBoundaryInSrc);
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                makeFCD(middle, 0, middle.length(), buffer);
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                src=firstBoundaryInSrc;
15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(doMakeFCD) {
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            makeFCD(s, src, limit, buffer);
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buffer.append(s, src, limit);
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Note: hasDecompBoundary() could be implemented as aliases to
15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // hasFCDBoundaryBefore() and hasFCDBoundaryAfter()
15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // at the cost of building the FCD trie for a decomposition normalizer.
15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasDecompBoundary(int c, boolean before) {
15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<minDecompNoCP) {
15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int norm16=getNorm16(c);
15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isHangul(norm16) || isDecompYesAndZeroCC(norm16)) {
15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(norm16>MIN_NORMAL_MAYBE_YES) {
15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;  // ccc!=0
15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=mapAlgorithmic(c, norm16);
15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data
15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstUnit=extraData.charAt(norm16);
15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!before) {
15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // decomp after-boundary: same as hasFCDBoundaryAfter(),
15887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // fcd16<=1 || trailCC==0
15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(firstUnit>0x1ff) {
15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;  // trailCC>1
15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(firstUnit<=0xff) {
15937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return true;  // trailCC==0
15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
15957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // if(trailCC==1) test leadCC==0, same as checking for before-boundary
15967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
15977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // true if leadCC==0 (hasFCDBoundaryBefore())
15987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(norm16-1)&0xff00)==0;
15997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
16007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
16037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasCompBoundaryBefore(int c) {
16057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c<minCompNoMaybeCP || hasCompBoundaryBefore(c, getNorm16(c));
16067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous, boolean testInert) {
16087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
16097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int norm16=getNorm16(c);
16107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isInert(norm16)) {
16117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
16127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(norm16<=minYesNo) {
16137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Hangul: norm16==minYesNo
16147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Hangul LVT has a boundary after it.
16157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Hangul LV and non-inert yesYes characters combine forward.
16167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return isHangul(norm16) && !Hangul.isHangulWithoutJamoT((char)c);
16177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(norm16>= (testInert ? minNoNo : minMaybeYes)) {
16187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
16197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
16207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=mapAlgorithmic(c, norm16);
16217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
16227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data.
16237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // If testInert, then c must be a yesNo character which has lccc=0,
16247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // otherwise it could be a noNo.
16257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstUnit=extraData.charAt(norm16);
16267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // true if
16277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   not MAPPING_NO_COMP_BOUNDARY_AFTER
16287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //     (which is set if
16297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //       c is not deleted, and
16307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //       it and its decomposition do not combine forward, and it has a starter)
16317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //   and if FCC then trailCC<=1
16327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return
16337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (firstUnit&MAPPING_NO_COMP_BOUNDARY_AFTER)==0 &&
16347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (!onlyContiguous || firstUnit<=0x1ff);
16357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
16367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasFCDBoundaryBefore(int c) { return c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff; }
16407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean hasFCDBoundaryAfter(int c) {
16417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int fcd16=getFCD16(c);
16427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return fcd16<=1 || (fcd16&0xff)==0;
16437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isFCDInert(int c) { return getFCD16(c)<=1; }
16457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
16477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
16487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isInert(int norm16) { return norm16==0; }
16497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isJamoL(int norm16) { return norm16==1; }
16507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
16517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isHangul(int norm16) { return norm16==minYesNo; }
16527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
16537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // UBool isCompYes(uint16_t norm16) const {
16547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
16557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // }
16567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // UBool isCompYesOrMaybe(uint16_t norm16) const {
16577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     return norm16<minNoNo || minMaybeYes<=norm16;
16587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // }
16597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private boolean hasZeroCCFromDecompYes(int norm16) {
16607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
16617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // }
16627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isDecompYesAndZeroCC(int norm16) {
16637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return norm16<minYesNo ||
16647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert               norm16==JAMO_VT ||
16657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
16667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
16687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
16697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the MaybeYes which combine-forward and have ccc=0.
16707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (Standard Unicode 5.2 normalization does not have such characters.)
16717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
16727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isMostDecompYesAndZeroCC(int norm16) {
16737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
16747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
16767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
16777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // For use with isCompYes().
16787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
16797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // static uint8_t getCCFromYes(uint16_t norm16) {
16807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //     return norm16>=MIN_YES_YES_WITH_CC ? (uint8_t)norm16 : 0;
16817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // }
16827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCCFromNoNo(int norm16) {
16837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((extraData.charAt(norm16)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
16847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return extraData.charAt(norm16-1)&0xff;
16857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
16867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
16877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
16897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC()
16907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getTrailCCFromCompYesAndZeroCC(CharSequence s, int cpStart, int cpLimit) {
16917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c;
16927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(cpStart==(cpLimit-1)) {
16937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=s.charAt(cpStart);
16947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
16957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=Character.codePointAt(s, cpStart);
16967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
16977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int prevNorm16=getNorm16(c);
16987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(prevNorm16<=minYesNo) {
16997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;  // yesYes and Hangul LV/LVT have ccc=tccc=0
17007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
17017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return extraData.charAt(prevNorm16)>>8;  // tccc from yesNo
17027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Requires algorithmic-NoNo.
17067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int mapAlgorithmic(int c, int norm16) {
17077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c+norm16-(minMaybeYes-MAX_DELTA-1);
17087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Requires minYesNo<norm16<limitNoNo.
17117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private int getMapping(int norm16) { return /*extraData+*/norm16; }
17127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return index into maybeYesCompositions, or -1
17157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCompositionsListForDecompYes(int norm16) {
17177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(norm16==0 || MIN_NORMAL_MAYBE_YES<=norm16) {
17187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -1;
17197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
17207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((norm16-=minMaybeYes)<0) {
17217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // norm16<minMaybeYes: index into extraData which is a substring at
17227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
17237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
17247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
17257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return norm16;
17277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return index into maybeYesCompositions
17317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCompositionsListForComposite(int norm16) {
17337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // composite has both mapping & compositions list
17347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstUnit=extraData.charAt(norm16);
17357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16+  // mapping in maybeYesCompositions
17367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            1+  // +1 to skip the first unit with the mapping lenth
17377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
17387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param c code point must have compositions
17417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return index into maybeYesCompositions
17427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
17437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int getCompositionsList(int norm16) {
17447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isDecompYes(norm16) ?
17457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                getCompositionsListForDecompYes(norm16) :
17467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                getCompositionsListForComposite(norm16);
17477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Decompose a short piece of text which is likely to contain characters that
17507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // fail the quick check loop and/or where the quick check loop's overhead
17517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // is unlikely to be amortized.
17527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Called by the compose() and makeFCD() implementations.
17537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Public in Java for collation implementation code.
17547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void decomposeShort(CharSequence s, int src, int limit,
17557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                               ReorderingBuffer buffer) {
17567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(src<limit) {
17577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=Character.codePointAt(s, src);
17587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            src+=Character.charCount(c);
17597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            decompose(c, getNorm16(c), buffer);
17607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void decompose(int c, int norm16,
17637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           ReorderingBuffer buffer) {
17647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Only loops for 1:1 algorithmic mappings.
17657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
17667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // get the decomposition and the lead and trail cc's
17677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isDecompYes(norm16)) {
17687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c does not decompose
17697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.append(c, getCCFromYesOrMaybe(norm16));
17707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isHangul(norm16)) {
17717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Hangul syllable: decompose algorithmically
17727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Hangul.decompose(c, buffer);
17737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
17747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=mapAlgorithmic(c, norm16);
17757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                norm16=getNorm16(c);
17767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
17777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
17787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data
17797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstUnit=extraData.charAt(norm16);
17807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int length=firstUnit&MAPPING_LENGTH_MASK;
17817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int leadCC, trailCC;
17827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                trailCC=firstUnit>>8;
17837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
17847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    leadCC=extraData.charAt(norm16-1)>>8;
17857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
17867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    leadCC=0;
17877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
17887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++norm16;  // skip over the firstUnit
17897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.append(extraData, norm16, norm16+length, leadCC, trailCC);
17907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
17917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
17927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
17937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
17947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
17957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
17967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Finds the recomposition result for
17977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a forward-combining "lead" character,
17987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * specified with a pointer to its compositions list,
17997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and a backward-combining "trail" character.
18007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
18017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>If the lead and trail characters combine, then this function returns
18027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the following "compositeAndFwd" value:
18037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <pre>
18047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 21..1  composite character
18057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      0  set if the composite is a forward-combining starter
18067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </pre>
18077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * otherwise it returns -1.
18087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
18097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>The compositions list has (trail, compositeAndFwd) pair entries,
18107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * encoded as either pairs or triples of 16-bit units.
18117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The last entry has the high bit of its first unit set.
18127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
18137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>The list is sorted by ascending trail characters (there are no duplicates).
18147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A linear search is used.
18157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
18167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <p>See normalizer2impl.h for a more detailed description
18177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * of the compositions list format.
18187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
18197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int combine(String compositions, int list, int trail) {
18207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int key1, firstUnit;
18217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(trail<COMP_1_TRAIL_LIMIT) {
18227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // trail character is 0..33FF
18237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // result entry may have 2 or 3 units
18247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            key1=(trail<<1);
18257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(key1>(firstUnit=compositions.charAt(list))) {
18267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                list+=2+(firstUnit&COMP_1_TRIPLE);
18277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&COMP_1_TRIPLE)!=0) {
18307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return ((int)compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
18317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
18327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return compositions.charAt(list+1);
18337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
18367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // trail character is 3400..10FFFF
18377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // result entry has 3 units
18387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
18397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
18407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int secondUnit;
18417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(;;) {
18427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(key1>(firstUnit=compositions.charAt(list))) {
18437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    list+=2+(firstUnit&COMP_1_TRIPLE);
18447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(key2>(secondUnit=compositions.charAt(list+1))) {
18467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
18477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            break;
18487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
18497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            list+=3;
18507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
18517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
18527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
18537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
18547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
18557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
18567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
18577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
18587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
18597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
18617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return -1;
18627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
18647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param list some character's compositions list
18657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param set recursively receives the composites from these compositions
18667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
18677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addComposites(int list, UnicodeSet set) {
18687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int firstUnit, compositeAndFwd;
18697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
18707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            firstUnit=maybeYesCompositions.charAt(list);
18717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((firstUnit&COMP_1_TRIPLE)==0) {
18727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compositeAndFwd=maybeYesCompositions.charAt(list+1);
18737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                list+=2;
18747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
18757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compositeAndFwd=(((int)maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
18767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                maybeYesCompositions.charAt(list+2);
18777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                list+=3;
18787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int composite=compositeAndFwd>>1;
18807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((compositeAndFwd&1)!=0) {
18817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
18827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
18837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(composite);
18847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while((firstUnit&COMP_1_LAST_TUPLE)==0);
18857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
18867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /*
18877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Recomposes the buffer text starting at recomposeStartIndex
18887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (which is in NFD - decomposed and canonically ordered),
18897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and truncates the buffer contents.
18907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
18917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Note that recomposition never lengthens the text:
18927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Any character consists of either one or two code units;
18937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * a composition may contain at most one more code unit than the original starter,
18947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * while the combining mark that is removed has at least one code unit.
18957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
18967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
18977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                           boolean onlyContiguous) {
18987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder sb=buffer.getStringBuilder();
18997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int p=recomposeStartIndex;
19007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(p==sb.length()) {
19017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
19027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
19037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int starter, pRemove;
19057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int compositionsList;
19067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int c, compositeAndFwd;
19077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16;
19087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cc, prevCC;
19097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean starterIsSupplementary;
19107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Some of the following variables are not used until we have a forward-combining starter
19127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and are only initialized now to avoid compiler warnings.
19137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
19147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        starter=-1;
19157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        starterIsSupplementary=false;
19167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        prevCC=0;
19177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
19197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c=sb.codePointAt(p);
19207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p+=Character.charCount(c);
19217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            norm16=getNorm16(c);
19227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cc=getCCFromYesOrMaybe(norm16);
19237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if( // this character combines backward and
19247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                isMaybe(norm16) &&
19257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // we have seen a starter that combines forward and
19267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compositionsList>=0 &&
19277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // the backward-combining character is not blocked
19287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (prevCC<cc || prevCC==0)
19297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ) {
19307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isJamoVT(norm16)) {
19317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // c is a Jamo V/T, see if we can compose it with the previous character.
19327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c<Hangul.JAMO_T_BASE) {
19337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
19347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
19357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(prev<Hangul.JAMO_L_COUNT) {
19367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            pRemove=p-1;
19377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            char syllable=(char)
19387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                (Hangul.HANGUL_BASE+
19397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
19407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                 Hangul.JAMO_T_COUNT);
19417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            char t;
19427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
19437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                ++p;
19447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                syllable+=t;  // The next character was a Jamo T.
19457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
19467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.setCharAt(starter, syllable);
19477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // remove the Jamo V/T
19487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.delete(pRemove, p);
19497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            p=pRemove;
19507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
19517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
19527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    /*
19537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     * No "else" for Jamo T:
19547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     * Since the input is in NFD, there are no Hangul LV syllables that
19557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     * a Jamo T could combine with.
19567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     * All Jamo Ts are combined above when handling Jamo Vs.
19577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                     */
19587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(p==sb.length()) {
19597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
19607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
19617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    compositionsList=-1;
19627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
19637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
19647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // The starter and the combining mark (c) do combine.
19657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int composite=compositeAndFwd>>1;
19667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Remove the combining mark.
19687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
19697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    sb.delete(pRemove, p);
19707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p=pRemove;
19717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Replace the starter with the composite.
19727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(starterIsSupplementary) {
19737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(composite>0xffff) {
19747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // both are supplementary
19757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
19767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
19777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
19787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.setCharAt(starter, (char)c);
19797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sb.deleteCharAt(starter+1);
19807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // The composite is shorter than the starter,
19817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            // move the intermediate characters forward one.
19827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            starterIsSupplementary=false;
19837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            --p;
19847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
19857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(composite>0xffff) {
19867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // The composite is longer than the starter,
19877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // move the intermediate characters back one.
19887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        starterIsSupplementary=true;
19897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
19907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
19917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        ++p;
19927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
19937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // both are on the BMP
19947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        sb.setCharAt(starter, (char)composite);
19957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
19967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Keep prevCC because we removed the combining mark.
19987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
19997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(p==sb.length()) {
20007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
20017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
20027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Is the composite a starter that combines forward?
20037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if((compositeAndFwd&1)!=0) {
20047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        compositionsList=
20057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            getCompositionsListForComposite(getNorm16(composite));
20067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
20077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        compositionsList=-1;
20087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
20097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // We combined; continue with looking for compositions.
20117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    continue;
20127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
20137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // no combination this time
20167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            prevCC=cc;
20177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p==sb.length()) {
20187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
20197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // If c did not combine, then check if it is a starter.
20227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cc==0) {
20237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Found a new starter.
20247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
20257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // It may combine with something, prepare for it.
20267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(c<=0xffff) {
20277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        starterIsSupplementary=false;
20287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        starter=p-1;
20297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
20307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        starterIsSupplementary=true;
20317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        starter=p-2;
20327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
20337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
20347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(onlyContiguous) {
20357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // FCC: no discontiguous compositions; any intervening character blocks.
20367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compositionsList=-1;
20377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        buffer.flush();
20407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
20417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int composePair(int a, int b) {
20437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
20447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int list;
20457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isInert(norm16)) {
20467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -1;
20477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(norm16<minYesNoMappingsOnly) {
20487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isJamoL(norm16)) {
20497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                b-=Hangul.JAMO_V_BASE;
20507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(0<=b && b<Hangul.JAMO_V_COUNT) {
20517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return
20527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (Hangul.HANGUL_BASE+
20537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)*
20547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                         Hangul.JAMO_T_COUNT);
20557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
20567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return -1;
20577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
20587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isHangul(norm16)) {
20597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                b-=Hangul.JAMO_T_BASE;
20607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(Hangul.isHangulWithoutJamoT((char)a) && 0<b && b<Hangul.JAMO_T_COUNT) {  // not b==0!
20617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return a+b;
20627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
20637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return -1;
20647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
20657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
20667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // 'a' has a compositions list in extraData
20677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                list=norm16;
20687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
20697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    list+=  // mapping pointer
20707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        1+  // +1 to skip the first unit with the mapping lenth
20717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        (extraData.charAt(list)&MAPPING_LENGTH_MASK);  // + mapping length
20727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
20737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Turn the offset-into-extraData into an offset-into-maybeYesCompositions.
20747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                list+=MIN_NORMAL_MAYBE_YES-minMaybeYes;
20757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
20767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
20777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -1;
20787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
20797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            list=norm16-minMaybeYes;  // offset into maybeYesCompositions
20807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
20827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return -1;
20837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
20847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return combine(maybeYesCompositions, list, b)>>1;
20857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
20867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
20877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
20887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Does c have a composition boundary before it?
20897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * True if its decomposition begins with a character that has
20907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
20917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
20927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * (isCompYesAndZeroCC()) so we need not decompose.
20937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
20947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean hasCompBoundaryBefore(int c, int norm16) {
20957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
20967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isCompYesAndZeroCC(norm16)) {
20977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
20987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isMaybeOrNonZeroCC(norm16)) {
20997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
21007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(isDecompNoAlgorithmic(norm16)) {
21017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                c=mapAlgorithmic(c, norm16);
21027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                norm16=getNorm16(c);
21037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
21047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // c decomposes, get everything from the variable-length extra data
21057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstUnit=extraData.charAt(norm16);
21067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&MAPPING_LENGTH_MASK)==0) {
21077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
21087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0 && (extraData.charAt(norm16-1)&0xff00)!=0) {
21107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;  // non-zero leadCC
21117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return isCompYesAndZeroCC(getNorm16(Character.codePointAt(extraData, norm16+1)));
21137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findPreviousCompBoundary(CharSequence s, int p) {
21177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(p>0) {
21187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=Character.codePointBefore(s, p);
21197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p-=Character.charCount(c);
21207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasCompBoundaryBefore(c)) {
21217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
21227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We could also test hasCompBoundaryAfter() and return iter.codePointLimit,
21247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // but that's probably not worth the extra cost.
21257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return p;
21277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findNextCompBoundary(CharSequence s, int p, int limit) {
21297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(p<limit) {
21307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=Character.codePointAt(s, p);
21317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int norm16=normTrie.get(c);
21327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(hasCompBoundaryBefore(c, norm16)) {
21337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
21347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p+=Character.charCount(c);
21367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return p;
21387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findPreviousFCDBoundary(CharSequence s, int p) {
21417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(p>0) {
21427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=Character.codePointBefore(s, p);
21437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p-=Character.charCount(c);
21447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
21457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
21467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return p;
21497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findNextFCDBoundary(CharSequence s, int p, int limit) {
21517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(p<limit) {
21527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int c=Character.codePointAt(s, p);
21537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(c<MIN_CCC_LCCC_CP || getFCD16(c)<=0xff) {
21547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
21557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p+=Character.charCount(c);
21577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return p;
21597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addToStartSet(Trie2Writable newData, int origin, int decompLead) {
21627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int canonValue=newData.get(decompLead);
21637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
21647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // origin is the first character whose decomposition starts with
21657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the character for which we are setting the value.
21667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newData.set(decompLead, canonValue|origin);
21677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
21687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // origin is not the first character, or it is U+0000.
21697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeSet set;
21707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((canonValue&CANON_HAS_SET)==0) {
21717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int firstOrigin=canonValue&CANON_VALUE_MASK;
21727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size();
21737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newData.set(decompLead, canonValue);
21747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                canonStartSets.add(set=new UnicodeSet());
21757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(firstOrigin!=0) {
21767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    set.add(firstOrigin);
21777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
21787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
21797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                set=canonStartSets.get(canonValue&CANON_VALUE_MASK);
21807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
21817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            set.add(origin);
21827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
21837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
21847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @SuppressWarnings("unused")
21867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private VersionInfo dataVersion;
21877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Code point thresholds for quick check codes.
21897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minDecompNoCP;
21907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minCompNoMaybeCP;
21917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Norm16 value thresholds for quick check combinations and types of extra data.
21937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minYesNo;
21947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minYesNoMappingsOnly;
21957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minNoNo;
21967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int limitNoNo;
21977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int minMaybeYes;
21987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
21997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Trie2_16 normTrie;
22007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String maybeYesCompositions;
22017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
22027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
22037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int[] tccc180;  // [0x180] tccc values for U+0000..U+017F
22047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Trie2_32 canonIterData;
22067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private ArrayList<UnicodeSet> canonStartSets;
22077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
22087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // bits in canonIterData
22097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000;
22107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CANON_HAS_COMPOSITIONS = 0x40000000;
22117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CANON_HAS_SET = 0x200000;
22127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int CANON_VALUE_MASK = 0x1fffff;
22137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
2214