12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Copyright (C) 2009-2015, International Business Machines
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *   Corporation and others.  All Rights Reserved.
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller *******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller */
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl;
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.io.IOException;
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.nio.ByteBuffer;
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.ArrayList;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.util.Iterator;
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UTF16;
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ICUUncheckedIOException;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.VersionInfo;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
231537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
2405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * Low-level implementation of the Unicode Normalization Algorithm.
2505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * For the data structure and details see the documentation at the end of
2605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * C++ normalizer2impl.h and in the design doc at
2705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert * http://site.icu-project.org/design/normalization/custom
281537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
29836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class Normalizer2Impl {
312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class Hangul {
322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* Korean Hangul and Jamo constants */
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_BASE=0x1100;     /* "lead" jamo */
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_END=0x1112;
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_BASE=0x1161;     /* "vowel" jamo */
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_END=0x1175;
372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_BASE=0x11a7;     /* "trail" jamo */
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_END=0x11c2;
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_BASE=0xac00;
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_END=0xd7a3;
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_COUNT=19;
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_COUNT=21;
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_T_COUNT=28;
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_L_LIMIT=JAMO_L_BASE+JAMO_L_COUNT;
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_V_LIMIT=JAMO_V_BASE+JAMO_V_COUNT;
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT;
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT;
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static final int HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT;
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isHangul(int c) {
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return HANGUL_BASE<=c && c<HANGUL_LIMIT;
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        public static boolean isHangulLV(int c) {
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c-=HANGUL_BASE;
6005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0;
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isJamoL(int c) {
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return JAMO_L_BASE<=c && c<JAMO_L_LIMIT;
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isJamoV(int c) {
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return JAMO_V_BASE<=c && c<JAMO_V_LIMIT;
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        public static boolean isJamoT(int c) {
6905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int t=c-JAMO_T_BASE;
7005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return 0<t && t<JAMO_T_COUNT;  // not JAMO_T_BASE itself
7105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
7205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        public static boolean isJamo(int c) {
7305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return JAMO_L_BASE<=c && c<=JAMO_T_END &&
7405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c);
7505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Decomposes c, which must be a Hangul syllable, into buffer
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * and returns the length of the decomposition (2 or 3).
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static int decompose(int c, Appendable buffer) {
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            try {
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c-=HANGUL_BASE;
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c2=c%JAMO_T_COUNT;
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c/=JAMO_T_COUNT;
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c2==0) {
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 2;
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_T_BASE+c2));
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return 3;
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } catch(IOException e) {
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Will not occur because we do not write to I/O.
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException(e);
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Decomposes c, which must be a Hangul syllable, into buffer.
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This is the raw, not recursive, decomposition. Its length is always 2.
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static void getRawDecomposition(int c, Appendable buffer) {
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            try {
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int orig=c;
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                c-=HANGUL_BASE;
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c2=c%JAMO_T_COUNT;
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c2==0) {
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c/=JAMO_T_COUNT;
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_L_BASE+c/JAMO_V_COUNT));
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_V_BASE+c%JAMO_V_COUNT));
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(orig-c2));  // LV syllable
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append((char)(JAMO_T_BASE+c2));
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } catch(IOException e) {
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Will not occur because we do not write to I/O.
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException(e);
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Writable buffer that takes care of canonical ordering.
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Its Appendable methods behave like the C++ implementation's
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * appendZeroCC() methods.
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If dest is a StringBuilder, then the buffer writes directly to it.
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Otherwise, the buffer maintains a StringBuilder for intermediate text segments
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * until no further changes are necessary and whole segments are appended.
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * append() methods that take combining-class values always write to the StringBuilder.
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Other append() methods flush and append to the Appendable.
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class ReorderingBuffer implements Appendable {
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer(Normalizer2Impl ni, Appendable dest, int destCapacity) {
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            impl=ni;
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            app=dest;
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(app instanceof StringBuilder) {
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appIsStringBuilder=true;
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str=(StringBuilder)dest;
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // In Java, the constructor subsumes public void init(int destCapacity) {
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.ensureCapacity(destCapacity);
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=0;
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(str.length()==0) {
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastCC=0;
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    setIterator();
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    lastCC=previousCC();
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Set reorderStart after the last code point with cc<=1 if there is one.
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(lastCC>1) {
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        while(previousCC()>1) {}
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit;
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                appIsStringBuilder=false;
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str=new StringBuilder();
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=0;
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean isEmpty() { return str.length()==0; }
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int length() { return str.length(); }
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int getLastCC() { return lastCC; }
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public StringBuilder getStringBuilder() { return str; }
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean equals(CharSequence s, int start, int limit) {
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UTF16Plus.equal(str, 0, str.length(), s, start, limit);
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void append(int c, int cc) {
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(lastCC<=cc || cc==0) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.appendCodePoint(c);
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=cc;
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length();
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                insert(c, cc);
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // s must be in NFD, otherwise change the implementation.
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void append(CharSequence s, int start, int limit,
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           int leadCC, int trailCC) {
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(start==limit) {
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return;
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(lastCC<=leadCC || leadCC==0) {
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(trailCC<=1) {
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length()+(limit-start);
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(leadCC<=1) {
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=str.length()+1;  // Ok if not a code point boundary.
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=trailCC;
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int c=Character.codePointAt(s, start);
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start+=Character.charCount(c);
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                insert(c, leadCC);  // insert first code point
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                while(start<limit) {
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    c=Character.codePointAt(s, start);
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    start+=Character.charCount(c);
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(start<limit) {
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // s must be in NFD, otherwise we need to use getCC().
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        leadCC=getCCFromYesOrMaybe(impl.getNorm16(c));
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        leadCC=trailCC;
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    append(c, leadCC);
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The following append() methods work like C++ appendZeroCC().
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // They assume that the cc or trailCC of their input is 0.
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Most of them implement Appendable interface methods.
219f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(char c) {
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.append(c);
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void appendZeroCC(int c) {
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.appendCodePoint(c);
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
231f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(CharSequence s) {
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s.length()!=0) {
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s);
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
240f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer append(CharSequence s, int start, int limit) {
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(start!=limit) {
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                lastCC=0;
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Flushes from the intermediate StringBuilder to the Appendable,
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * if they are different objects.
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Used after recomposition.
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Must be called at the end when writing to a non-StringBuilder Appendable.
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void flush() {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(appIsStringBuilder) {
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    app.append(str);
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    str.setLength(0);
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=0;
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(IOException e) {
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Flushes from the intermediate StringBuilder to the Appendable,
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * if they are different objects.
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Then appends the new text to the Appendable or StringBuilder.
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Normally used after quick check loops find a non-empty sequence.
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public ReorderingBuffer flushAndAppendZeroCC(CharSequence s, int start, int limit) {
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(appIsStringBuilder) {
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.append(s, start, limit);
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                reorderStart=str.length();
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                try {
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    app.append(str).append(s, start, limit);
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    str.setLength(0);
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=0;
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } catch(IOException e) {
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new ICUUncheckedIOException(e);  // Avoid declaring "throws IOException".
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void remove() {
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.setLength(0);
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=0;
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public void removeSuffix(int suffixLength) {
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int oldLength=str.length();
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            str.delete(oldLength-suffixLength, oldLength);
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastCC=0;
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            reorderStart=str.length();
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /*
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * TODO: Revisit whether it makes sense to track reorderStart.
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * It is set to after the last known character with cc<=1,
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * which stops previousCC() before it reads that character and looks up its cc.
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * previousCC() is normally only called from insert().
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * In other words, reorderStart speeds up the insertion of a combining mark
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * into a multi-combining mark sequence where it does not belong at the end.
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * This might not be worth the trouble.
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * On the other hand, it's not a huge amount of trouble.
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * We probably need it for UNORM_SIMPLE_APPEND.
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Inserts c somewhere before the last character.
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Requires 0<cc<lastCC which implies reorderStart<limit.
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void insert(int c, int cc) {
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(setIterator(), skipPrevious(); previousCC()>cc;) {}
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // insert c at codePointLimit, after the character with prevCC<=cc
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(c<=0xffff) {
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.insert(codePointLimit, (char)c);
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit+1;
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                str.insert(codePointLimit, Character.toChars(c));
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(cc<=1) {
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    reorderStart=codePointLimit+2;
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final Normalizer2Impl impl;
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final Appendable app;
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final StringBuilder str;
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private final boolean appIsStringBuilder;
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int reorderStart;
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int lastCC;
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // private backward iterator
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void setIterator() { codePointStart=str.length(); }
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private void skipPrevious() {  // Requires 0<codePointStart.
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointLimit=codePointStart;
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointStart=str.offsetByCodePoints(codePointStart, -1);
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int previousCC() {  // Returns 0 if there is no previous character.
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointLimit=codePointStart;
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(reorderStart>=codePointStart) {
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=str.codePointBefore(codePointStart);
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            codePointStart-=Character.charCount(c);
35405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return impl.getCCFromYesOrMaybeCP(c);
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private int codePointStart, codePointLimit;
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose as public API on the UTF16 class.
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose widening UTF16 methods that take char to take int.
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // TODO: Propose widening UTF16 methods that take String to take CharSequence.
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final class UTF16Plus {
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Assuming c is a surrogate code point (UTF16.isSurrogate(c)),
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * is it a lead surrogate?
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param c code unit or code point
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true or false
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean isSurrogateLead(int c) { return (c&0x400)==0; }
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Compares two CharSequence objects for binary equality.
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s1 first sequence
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s2 second sequence
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true if s1 contains the same text as s2
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean equal(CharSequence s1,  CharSequence s2) {
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s1==s2) {
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int length=s1.length();
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(length!=s2.length()) {
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int i=0; i<length; ++i) {
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(s1.charAt(i)!=s2.charAt(i)) {
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /**
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * Compares two CharSequence subsequences for binary equality.
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s1 first sequence
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param start1 start offset in first sequence
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param limit1 limit offset in first sequence
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param s2 second sequence
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param start2 start offset in second sequence
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @param limit2 limit offset in second sequence
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         * @return true if s1.subSequence(start1, limit1) contains the same text
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         *              as s2.subSequence(start2, limit2)
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller         */
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public static boolean equal(CharSequence s1, int start1, int limit1,
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    CharSequence s2, int start2, int limit2) {
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((limit1-start1)!=(limit2-start2)) {
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(s1==s2 && start1==start2) {
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return true;
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(start1<limit1) {
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(s1.charAt(start1++)!=s2.charAt(start2++)) {
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return true;
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl() {}
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final class IsAcceptable implements ICUBinary.Authenticate {
423f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public boolean isDataVersionAcceptable(byte version[]) {
42505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return version[0]==3;
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final IsAcceptable IS_ACCEPTABLE = new IsAcceptable();
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int DATA_FORMAT = 0x4e726d32;  // "Nrm2"
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl load(ByteBuffer bytes) {
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        try {
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int indexesLength=bytes.getInt()/4;  // inIndexes[IX_NORM_TRIE_OFFSET]/4
43505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if(indexesLength<=IX_MIN_LCCC_CP) {
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException("Normalizer2 data: not enough indexes");
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] inIndexes=new int[indexesLength];
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            inIndexes[0]=indexesLength*4;
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int i=1; i<indexesLength; ++i) {
4412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                inIndexes[i]=bytes.getInt();
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minDecompNoCP=inIndexes[IX_MIN_DECOMP_NO_CP];
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minCompNoMaybeCP=inIndexes[IX_MIN_COMP_NO_MAYBE_CP];
44605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            minLcccCP=inIndexes[IX_MIN_LCCC_CP];
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minYesNo=inIndexes[IX_MIN_YES_NO];
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minYesNoMappingsOnly=inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY];
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minNoNo=inIndexes[IX_MIN_NO_NO];
45105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            minNoNoCompBoundaryBefore=inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE];
45205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            minNoNoCompNoMaybeCC=inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC];
45305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            minNoNoEmpty=inIndexes[IX_MIN_NO_NO_EMPTY];
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            limitNoNo=inIndexes[IX_LIMIT_NO_NO];
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            minMaybeYes=inIndexes[IX_MIN_MAYBE_YES];
45605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            assert((minMaybeYes&7)==0);  // 8-aligned for noNoDelta bit fields
45705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            centerNoNoDelta=(minMaybeYes>>DELTA_SHIFT)-MAX_DELTA-1;
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Read the normTrie.
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int offset=inIndexes[IX_NORM_TRIE_OFFSET];
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            normTrie=Trie2_16.createFromSerialized(bytes);
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int trieLength=normTrie.getSerializedLength();
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(trieLength>(nextOffset-offset)) {
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new ICUUncheckedIOException("Normalizer2 data: not enough bytes for normTrie");
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ICUBinary.skipBytes(bytes, (nextOffset-offset)-trieLength);  // skip padding after trie bytes
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Read the composition and mapping data.
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset=nextOffset;
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int numChars=(nextOffset-offset)/2;
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(numChars!=0) {
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                maybeYesCompositions=ICUBinary.getString(bytes, numChars, 0);
47505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                extraData=maybeYesCompositions.substring((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT);
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // smallFCD: new in formatVersion 2
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            offset=nextOffset;
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            smallFCD=new byte[0x100];
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            bytes.get(smallFCD);
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return this;
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } catch(IOException e) {
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new ICUUncheckedIOException(e);
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Normalizer2Impl load(String name) {
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return load(ICUBinary.getRequiredData(name));
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void enumLcccRange(int start, int end, int norm16, UnicodeSet set) {
49305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 > MIN_NORMAL_MAYBE_YES && norm16 != JAMO_VT) {
49405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            set.add(start, end);
49505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) {
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int fcd16=getFCD16(start);
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(fcd16>0xff) { set.add(start, end); }
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void enumNorm16PropertyStartsRange(int start, int end, int value, UnicodeSet set) {
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point to the USet */
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.add(start);
50405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(start!=end && isAlgorithmicNoNo(value) && (value & DELTA_TCCC_MASK) > DELTA_TCCC_1) {
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Range of code points with same-norm16-value algorithmic decompositions.
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // They might have different non-zero FCD16 values.
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int prevFCD16=getFCD16(start);
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(++start<=end) {
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int fcd16=getFCD16(start);
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(fcd16!=prevFCD16) {
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    set.add(start);
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=fcd16;
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addLcccChars(UnicodeSet set) {
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            enumLcccRange(range.startCodePoint, range.endCodePoint, range.value, set);
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addPropertyStarts(UnicodeSet set) {
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point of each same-value range of each trie */
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            enumNorm16PropertyStartsRange(range.startCodePoint, range.endCodePoint, range.value, set);
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add Hangul LV syllables and LV+1 because of skippables */
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int c=Hangul.HANGUL_BASE; c<Hangul.HANGUL_LIMIT; c+=Hangul.JAMO_T_COUNT) {
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(c);
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(c+1);
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.add(Hangul.HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void addCanonIterPropertyStarts(UnicodeSet set) {
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        /* add the start code point of each same-value range of the canonical iterator data trie */
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ensureCanonIterData();
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // currently only used for the SEGMENT_STARTER property
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Iterator<Trie2.Range> trieIterator=canonIterData.iterator(segmentStarterMapper);
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        Trie2.Range range;
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            /* add the start code point to the USet */
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(range.startCodePoint);
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final Trie2.ValueMapper segmentStarterMapper=new Trie2.ValueMapper() {
554f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public int map(int in) {
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return in&CANON_NOT_SEGMENT_STARTER;
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // low-level properties ------------------------------------------------ ***
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Note: Normalizer2Impl.java r30983 (2011-nov-27)
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // still had getFCDTrie() which built and cached an FCD trie.
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // That provided faster access to FCD data than getFCD16FromNormData()
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // but required synchronization and consumed some 10kB of heap memory
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // in any process that uses FCD (e.g., via collation).
56705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance,
56805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // at least for ASCII & CJK.
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Builds the canonical-iterator data for this instance.
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is required before any of {@link #isCanonSegmentStarter(int)} or
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * {@link #getCanonStartSet(int, UnicodeSet)} are called,
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else they crash.
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return this
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public synchronized Normalizer2Impl ensureCanonIterData() {
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(canonIterData==null) {
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Trie2Writable newData=new Trie2Writable(0, 0);
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            canonStartSets=new ArrayList<UnicodeSet>();
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Iterator<Trie2.Range> trieIterator=normTrie.iterator();
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Trie2.Range range;
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                final int norm16=range.value;
58505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) {
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Inert, or 2-way mapping (including Hangul syllable).
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We do not write a canonStartSet for any yesNo character.
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Composites from 2-way mappings are added at runtime from the
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // starter's compositions list, and the other characters in
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // "maybe" characters.
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for(int c=range.startCodePoint; c<=range.endCodePoint; ++c) {
5952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    final int oldValue=newData.get(c);
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int newValue=oldValue;
59705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if(isMaybeOrNonZeroCC(norm16)) {
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // not a segment starter if it occurs in a decomposition or has cc!=0
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newValue|=CANON_NOT_SEGMENT_STARTER;
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(norm16<MIN_NORMAL_MAYBE_YES) {
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            newValue|=CANON_HAS_COMPOSITIONS;
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(norm16<minYesNo) {
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newValue|=CANON_HAS_COMPOSITIONS;
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // c has a one-way decomposition
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int c2=c;
60805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        // Do not modify the whole-range norm16 value.
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        int norm16_2=norm16;
61005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (isDecompNoAlgorithmic(norm16_2)) {
61105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            // Maps to an isCompYesAndZeroCC.
61205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            c2 = mapAlgorithmic(c2, norm16_2);
61305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            norm16_2 = getNorm16(c2);
61405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            // No compatibility mappings for the CanonicalIterator.
61505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            assert(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2)));
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
61705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (norm16_2 > minYesNo) {
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // c decomposes, get everything from the variable-length extra data
61905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            int mapping=norm16_2>>OFFSET_SHIFT;
62005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            int firstUnit=extraData.charAt(mapping);
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            int length=firstUnit&MAPPING_LENGTH_MASK;
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
62305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                if(c==c2 && (extraData.charAt(mapping-1)&0xff)!=0) {
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    newValue|=CANON_NOT_SEGMENT_STARTER;  // original c has cc!=0
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // Skip empty mappings (no characters in the decomposition).
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(length!=0) {
62905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                ++mapping;  // skip over the firstUnit
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // add c to first code point's start set
63105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                int limit=mapping+length;
63205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                c2=extraData.codePointAt(mapping);
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                addToStartSet(newData, c, c2);
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // one-way mapping. A 2-way mapping is possible here after
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // intermediate algorithmic mapping.
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(norm16_2>=minNoNo) {
63805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                    while((mapping+=Character.charCount(c2))<limit) {
63905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                        c2=extraData.codePointAt(mapping);
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        int c2Value=newData.get(c2);
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) {
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                            newData.set(c2, c2Value|CANON_NOT_SEGMENT_STARTER);
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        }
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    }
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // c decomposed to c2 algorithmically; c has cc==0
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            addToStartSet(newData, c, c2);
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(newValue!=oldValue) {
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        newData.set(c, newValue);
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            canonIterData=newData.toTrie2_32();
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return this;
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getNorm16(int c) { return normTrie.get(c); }
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getCompQuickCheck(int norm16) {
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 1;  // yes
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(minMaybeYes<=norm16) {
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 2;  // maybe
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;  // no
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isAlgorithmicNoNo(int norm16) { return limitNoNo<=norm16 && norm16<minMaybeYes; }
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isCompNo(int norm16) { return minNoNo<=norm16 && norm16<minMaybeYes; }
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isDecompYes(int norm16) { return norm16<minYesNo || minMaybeYes<=norm16; }
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getCC(int norm16) {
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16>=MIN_NORMAL_MAYBE_YES) {
67905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return getCCFromNormalYesOrMaybe(norm16);
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(norm16<minNoNo || limitNoNo<=norm16) {
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getCCFromNoNo(norm16);
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
68605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static int getCCFromNormalYesOrMaybe(int norm16) {
68705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (norm16 >> OFFSET_SHIFT) & 0xff;
68805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static int getCCFromYesOrMaybe(int norm16) {
69005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
69105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
69205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public int getCCFromYesOrMaybeCP(int c) {
69305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (c < minCompNoMaybeCP) { return 0; }
69405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return getCCFromYesOrMaybe(getNorm16(c));
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the FCD data for code point c.
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getFCD16(int c) {
70305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(c<minDecompNoCP) {
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(c<=0xffff) {
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; }
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return getFCD16FromNormData(c);
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean singleLeadMightHaveNonZeroFCD16(int lead) {
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // 0<=lead<=0xffff
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        byte bits=smallFCD[lead>>8];
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(bits==0) { return false; }
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((bits>>((lead>>5)&7))&1)!=0;
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Gets the FCD value from the regular normalization data. */
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int getFCD16FromNormData(int c) {
72005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int norm16=getNorm16(c);
72105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 >= limitNoNo) {
72205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if(norm16>=MIN_NORMAL_MAYBE_YES) {
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // combining mark
72405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                norm16=getCCFromNormalYesOrMaybe(norm16);
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return norm16|(norm16<<8);
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(norm16>=minMaybeYes) {
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return 0;
72805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else {  // isDecompNoAlgorithmic(norm16)
72905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                int deltaTrailCC = norm16 & DELTA_TCCC_MASK;
73005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (deltaTrailCC <= DELTA_TCCC_1) {
73105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    return deltaTrailCC >> OFFSET_SHIFT;
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
73305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Maps to an isCompYesAndZeroCC.
73405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                c=mapAlgorithmic(c, norm16);
73505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                norm16=getNorm16(c);
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
73805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(norm16<=minYesNo || isHangulLVT(norm16)) {
73905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // no decomposition or Hangul syllable, all zeros
74005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return 0;
74105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
74205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // c decomposes, get everything from the variable-length extra data
74305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
74405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int firstUnit=extraData.charAt(mapping);
74505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int fcd16=firstUnit>>8;  // tccc
74605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
74705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            fcd16|=extraData.charAt(mapping-1)&0xff00;  // lccc
74805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
74905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return fcd16;
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets the decomposition for one code point.
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return c's decomposition, if it has one; returns null if it does not have a decomposition
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getDecomposition(int c) {
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
75905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) {
76005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // c does not decompose
76105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return null;
76205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
76305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int decomp = -1;
76405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(isDecompNoAlgorithmic(norm16)) {
76505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Maps to an isCompYesAndZeroCC.
76605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            decomp=c=mapAlgorithmic(c, norm16);
76705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The mapping might decompose further.
76805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            norm16 = getNorm16(c);
76905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
77005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 < minYesNo) {
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(decomp<0) {
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return null;
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return UTF16.valueOf(decomp);
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
77605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
77705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Hangul syllable: decompose algorithmically
77805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            StringBuilder buffer=new StringBuilder();
77905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            Hangul.decompose(c, buffer);
78005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return buffer.toString();
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
78205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // c decomposes, get everything from the variable-length extra data
78305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
78405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int length=extraData.charAt(mapping++)&MAPPING_LENGTH_MASK;
78505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return extraData.substring(mapping, mapping+length);
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Gets the raw decomposition for one code point.
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return c's raw decomposition, if it has one; returns null if it does not have a decomposition
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public String getRawDecomposition(int c) {
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // c does not decompose
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return null;
79805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Hangul syllable: decompose algorithmically
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            StringBuilder buffer=new StringBuilder();
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Hangul.getRawDecomposition(c, buffer);
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return buffer.toString();
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(isDecompNoAlgorithmic(norm16)) {
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return UTF16.valueOf(mapAlgorithmic(c, norm16));
80505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
80605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // c decomposes, get everything from the variable-length extra data
80705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
80805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int firstUnit=extraData.charAt(mapping);
80905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mLength=firstUnit&MAPPING_LENGTH_MASK;  // length of normal mapping
81005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if((firstUnit&MAPPING_HAS_RAW_MAPPING)!=0) {
81105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word.
81205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Bit 7=MAPPING_HAS_CCC_LCCC_WORD
81305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int rawMapping=mapping-((firstUnit>>7)&1)-1;
81405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            char rm0=extraData.charAt(rawMapping);
81505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if(rm0<=MAPPING_LENGTH_MASK) {
81605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return extraData.substring(rawMapping-rm0, rawMapping);
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
81805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Copy the normal mapping and replace its first two code units with rm0.
81905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                StringBuilder buffer=new StringBuilder(mLength-1).append(rm0);
82005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                mapping+=1+2;  // skip over the firstUnit and the first two mapping code units
82105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return buffer.append(extraData, mapping, mapping+mLength-2).toString();
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
82305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
82405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            mapping+=1;  // skip over the firstUnit
82505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return extraData.substring(mapping, mapping+mLength);
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if code point c starts a canonical-iterator string segment.
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else this method will crash.</b>
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if c starts a canonical-iterator string segment.
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isCanonSegmentStarter(int c) {
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return canonIterData.get(c)>=0;
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns true if there are characters whose decomposition starts with c.
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If so, then the set is cleared and then filled with those characters.
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <b>{@link #ensureCanonIterData()} must have been called before this method,
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or else this method will crash.</b>
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c A Unicode code point.
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param set A UnicodeSet to receive the characters whose decompositions
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *        start with c, if there are any.
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return true if there are characters whose decomposition starts with c.
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean getCanonStartSet(int c, UnicodeSet set) {
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int canonValue=canonIterData.get(c)&~CANON_NOT_SEGMENT_STARTER;
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(canonValue==0) {
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        set.clear();
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int value=canonValue&CANON_VALUE_MASK;
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&CANON_HAS_SET)!=0) {
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.addAll(canonStartSets.get(value));
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(value!=0) {
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(value);
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&CANON_HAS_COMPOSITIONS)!=0) {
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=getNorm16(c);
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(norm16==JAMO_L) {
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int syllable=Hangul.HANGUL_BASE+(c-Hangul.JAMO_L_BASE)*Hangul.JAMO_VT_COUNT;
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set.add(syllable, syllable+Hangul.JAMO_VT_COUNT-1);
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                addComposites(getCompositionsList(norm16), set);
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
87305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // Fixed norm16 values.
87405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int MIN_YES_YES_WITH_CC=0xfe02;
87505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int JAMO_VT=0xfe00;
87605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int MIN_NORMAL_MAYBE_YES=0xfc00;
87705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int JAMO_L=2;  // offset=1 hasCompBoundaryAfter=FALSE
87805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int INERT=1;  // offset=0 hasCompBoundaryAfter=TRUE
87905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
88005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // norm16 bit 0 is comp-boundary-after.
88105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int HAS_COMP_BOUNDARY_AFTER=1;
88205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int OFFSET_SHIFT=1;
88305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
88405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // For algorithmic one-way mappings, norm16 bits 2..1 indicate the
88505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // tccc (0, 1, >1) for quick FCC boundary-after tests.
88605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int DELTA_TCCC_0=0;
88705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int DELTA_TCCC_1=2;
88805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int DELTA_TCCC_GT_1=4;
88905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int DELTA_TCCC_MASK=6;
89005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int DELTA_SHIFT=3;
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAX_DELTA=0x40;
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Byte offsets from the start of the data, after the generic header.
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_NORM_TRIE_OFFSET=0;
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_EXTRA_DATA_OFFSET=1;
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_SMALL_FCD_OFFSET=2;
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_RESERVED3_OFFSET=3;
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_TOTAL_SIZE=7;
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Code point thresholds for quick check codes.
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_DECOMP_NO_CP=8;
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_COMP_NO_MAYBE_CP=9;
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Norm16 value thresholds for quick check combinations and types of extra data.
90605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
90705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_YES_NO=10;
90905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings are comp-normalized. */
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_NO_NO=11;
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_LIMIT_NO_NO=12;
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_MAYBE_YES=13;
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
91405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int IX_MIN_YES_NO_MAPPINGS_ONLY=14;
91605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings are not comp-normalized but have a comp boundary before. */
91705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE=15;
91805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings do not have a comp boundary before. */
91905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int IX_MIN_NO_NO_COMP_NO_MAYBE_CC=16;
92005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** Mappings to the empty string. */
92105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int IX_MIN_NO_NO_EMPTY=17;
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
92305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int IX_MIN_LCCC_CP=18;
92405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public static final int IX_COUNT=20;
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_HAS_CCC_LCCC_WORD=0x80;
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_HAS_RAW_MAPPING=0x40;
92805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // unused bit 0x20;
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int MAPPING_LENGTH_MASK=0x1f;
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_LAST_TUPLE=0x8000;
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRIPLE=1;
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_LIMIT=0x3400;
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_MASK=0x7ffe;
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_1_TRAIL_SHIFT=9;  // 10-1 for the "triple" bit
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_2_TRAIL_SHIFT=6;
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public static final int COMP_2_TRAIL_MASK=0xffc0;
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // higher-level functionality ------------------------------------------ ***
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // NFD without an NFD Normalizer2 instance.
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public Appendable decompose(CharSequence s, StringBuilder dest) {
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        decompose(s, 0, s.length(), dest, s.length());
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return dest;
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Decomposes s[src, limit[ and writes the result to dest.
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * limit can be NULL if src is NUL-terminated.
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * destLengthEstimate is the initial dest buffer capacity and can be -1.
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void decompose(CharSequence s, int src, int limit, StringBuilder dest,
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                   int destLengthEstimate) {
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(destLengthEstimate<0) {
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            destLengthEstimate=limit-src;
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dest.setLength(0);
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ReorderingBuffer buffer=new ReorderingBuffer(this, dest, destLengthEstimate);
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        decompose(s, src, limit, buffer);
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Dual functionality:
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer!=NULL: normalize
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int decompose(CharSequence s, int src, int limit,
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         ReorderingBuffer buffer) {
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int minNoCP=minDecompNoCP;
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=0;
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // only for quick check
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevCC=0;
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units below the minimum or with irrelevant data for the quick check
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src; src!=limit;) {
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoCP ||
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isMostDecompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!UTF16.isSurrogate((char)c)) {
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    char c2;
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16Plus.isSurrogateLead(c)) {
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint((char)c, c2);
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else /* trail surrogate */ {
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --src;
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            c=Character.toCodePoint(c2, (char)c);
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(isMostDecompYesAndZeroCC(norm16=getNorm16(c))) {
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // copy these code units all at once
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.flushAndAppendZeroCC(s, prevSrc, src);
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevCC=0;
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary=src;
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src==limit) {
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Check one above-minimum, relevant code point.
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(buffer!=null) {
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompose(c, norm16, buffer);
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isDecompYes(norm16)) {
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int cc=getCCFromYesOrMaybe(norm16);
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prevCC<=cc || cc==0) {
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevCC=cc;
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(cc<=1) {
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            prevBoundary=src;
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        continue;
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return prevBoundary;  // "no" or cc out of order
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src;
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void decomposeAndAppend(CharSequence s, boolean doDecompose, ReorderingBuffer buffer) {
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limit=s.length();
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(limit==0) {
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doDecompose) {
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decompose(s, 0, limit, buffer);
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Just merge the strings at the boundary.
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=Character.codePointAt(s, 0);
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0;
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int firstCC, prevCC, cc;
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        firstCC=prevCC=cc=getCC(getNorm16(c));
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(cc!=0) {
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevCC=cc;
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src>=limit) {
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=Character.codePointAt(s, src);
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cc=getCC(getNorm16(c));
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        };
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append(s, 0, src, firstCC, prevCC);
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.append(s, src, limit);
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
106305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Very similar to composeQuickCheck(): Make the same changes in both places if relevant.
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // doCompose: normalize
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // !doCompose: isNormalized (buffer must be empty and initialized)
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean compose(CharSequence s, int src, int limit,
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean onlyContiguous,
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean doCompose,
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           ReorderingBuffer buffer) {
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
107205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int minNoMaybeCP=minCompNoMaybeCP;
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
107405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        for (;;) {
107505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
107605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or with (compYes && ccc==0) properties.
107705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int prevSrc;
107805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int c = 0;
107905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16 = 0;
108005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            for (;;) {
108105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (src == limit) {
108205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (prevBoundary != limit && doCompose) {
108305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        buffer.append(s, prevBoundary, limit);
108405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
108505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    return true;
108605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoMaybeCP ||
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
109205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    prevSrc = src++;
109305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if(!UTF16.isSurrogate((char)c)) {
109405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        break;
109505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    } else {
109605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        char c2;
109705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if(UTF16Plus.isSurrogateLead(c)) {
109805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
109905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                ++src;
110005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                c=Character.toCodePoint((char)c, c2);
110105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
110205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        } else /* trail surrogate */ {
110305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
110405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                --prevSrc;
110505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                c=Character.toCodePoint(c2, (char)c);
110605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
110805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
110905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            break;
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
111405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
111505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The current character is either a "noNo" (has a mapping)
111605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or a "maybeYes" (combines backward)
111705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or a "yesYes" with ccc!=0.
111805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // It is not a Hangul syllable or Jamo L because those have "yes" properties.
111905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
112005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Medium-fast path: Handle cases that do not require full decomposition and recomposition.
112105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (!isMaybeOrNonZeroCC(norm16)) {  // minNoNo <= norm16 < minMaybeYes
112205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (!doCompose) {
112305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    return false;
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
112505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Fast path for mapping a character that is immediately surrounded by boundaries.
112605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // In this case, we need not decompose around the current character.
112705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (isDecompNoAlgorithmic(norm16)) {
112805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // Maps to a single isCompYesAndZeroCC character
112905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // which also implies hasCompBoundaryBefore.
113005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
113105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            hasCompBoundaryBefore(s, src, limit)) {
113205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (prevBoundary != prevSrc) {
113305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            buffer.append(s, prevBoundary, prevSrc);
113405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
113505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        buffer.append(mapAlgorithmic(c, norm16), 0);
113605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevBoundary = src;
113705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        continue;
113805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
113905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                } else if (norm16 < minNoNoCompBoundaryBefore) {
114005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The mapping is comp-normalized which also implies hasCompBoundaryBefore.
114105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) ||
114205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            hasCompBoundaryBefore(s, src, limit)) {
114305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (prevBoundary != prevSrc) {
114405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            buffer.append(s, prevBoundary, prevSrc);
114505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
114605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        int mapping = norm16 >> OFFSET_SHIFT;
114705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        int length = extraData.charAt(mapping++) & MAPPING_LENGTH_MASK;
114805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        buffer.append(extraData, mapping, mapping + length);
114905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevBoundary = src;
115005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        continue;
115105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
115205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                } else if (norm16 >= minNoNoEmpty) {
115305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The current character maps to nothing.
115405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // Simply omit it from the output if there is a boundary before _or_ after it.
115505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The character itself implies no boundaries.
115605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (hasCompBoundaryBefore(s, src, limit) ||
115705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            hasCompBoundaryAfter(s, prevBoundary, prevSrc, onlyContiguous)) {
115805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (prevBoundary != prevSrc) {
115905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            buffer.append(s, prevBoundary, prevSrc);
116005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
116105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevBoundary = src;
116205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        continue;
116305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
116505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Other "noNo" type, or need to examine more text around this character:
116605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Fall through to the slow path.
116705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else if (isJamoVT(norm16) && prevBoundary != prevSrc) {
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                char prev=s.charAt(prevSrc-1);
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(c<Hangul.JAMO_T_BASE) {
117005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The current character is a Jamo Vowel,
117105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // compose with previous Jamo L and following Jamo T.
117205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    char l = (char)(prev-Hangul.JAMO_L_BASE);
117305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if(l<Hangul.JAMO_L_COUNT) {
117405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (!doCompose) {
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            return false;
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
117705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        int t;
117805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (src != limit &&
117905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                0 < (t = (s.charAt(src) - Hangul.JAMO_T_BASE)) &&
118005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                t < Hangul.JAMO_T_COUNT) {
118105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            // The next character is a Jamo T.
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            ++src;
118305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        } else if (hasCompBoundaryBefore(s, src, limit)) {
118405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            // No Jamo T follows, not even via decomposition.
118505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            t = 0;
118605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        } else {
118705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            t = -1;
118805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
118905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (t >= 0) {
119005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            int syllable = Hangul.HANGUL_BASE +
119105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                (l*Hangul.JAMO_V_COUNT + (c-Hangul.JAMO_V_BASE)) *
119205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                Hangul.JAMO_T_COUNT + t;
119305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            --prevSrc;  // Replace the Jamo L as well.
119405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if (prevBoundary != prevSrc) {
119505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                buffer.append(s, prevBoundary, prevSrc);
119605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
119705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            buffer.append((char)syllable);
119805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            prevBoundary = src;
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            continue;
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // If we see L+V+x where x!=T then we drop to the slow path,
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // decompose and recompose.
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // This is to deal with NFKC finding normal L and V but a
120405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        // compatibility variant of a T.
120505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        // We need to either fully compose that combination here
120605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        // (which would complicate the code and may not work with strange custom data)
120705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        // or use the slow path.
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
120905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                } else if (Hangul.isHangulLV(prev)) {
121005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The current character is a Jamo Trailing consonant,
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // compose with previous Hangul LV that does not contain a Jamo T.
121205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (!doCompose) {
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
121505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    int syllable = prev + c - Hangul.JAMO_T_BASE;
121605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    --prevSrc;  // Replace the Hangul LV as well.
121705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (prevBoundary != prevSrc) {
121805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        buffer.append(s, prevBoundary, prevSrc);
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
122005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    buffer.append((char)syllable);
122105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    prevBoundary = src;
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
122405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // No matching context, or may need to decompose surrounding text first:
122505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Fall through to the slow path.
122605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else if (norm16 > JAMO_VT) {  // norm16 >= MIN_YES_YES_WITH_CC
122705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // One or more combining marks that do not combine-back:
122805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // Check for canonical order, copy unchanged if ok and
122905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                // if followed by a character with a boundary-before.
123005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                int cc = getCCFromNormalYesOrMaybe(norm16);  // cc!=0
123105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (onlyContiguous /* FCC */ && getPreviousTrailCC(s, prevBoundary, prevSrc) > cc) {
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Fails FCD test, need to decompose and contiguously recompose.
123305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (!doCompose) {
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return false;
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
123705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // If !onlyContiguous (not FCC), then we ignore the tccc of
123805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // the previous character which passed the quick check "yes && ccc==0" test.
123905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    int n16;
124005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    for (;;) {
124105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (src == limit) {
124205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if (doCompose) {
124305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                buffer.append(s, prevBoundary, limit);
124405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
124505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            return true;
124605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
124705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        int prevCC = cc;
124805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        c = Character.codePointAt(s, src);
124905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        n16 = normTrie.get(c);
125005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (n16 >= MIN_YES_YES_WITH_CC) {
125105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            cc = getCCFromNormalYesOrMaybe(n16);
125205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if (prevCC > cc) {
125305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                if (!doCompose) {
125405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                    return false;
125505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                }
125605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                break;
125705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
125805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        } else {
125905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            break;
126005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
126105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        src += Character.charCount(c);
126205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
126305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // p is after the last in-order combining mark.
126405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // If there is a boundary here, then we continue with no change.
126505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (norm16HasCompBoundaryBefore(n16)) {
126605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (isCompYesAndZeroCC(n16)) {
126705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            src += Character.charCount(c);
126805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
126905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        continue;
127005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
127105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // Use the slow path. There is no boundary in [prevSrc, src[.
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
127505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Slow path: Find the nearest boundaries around the current character,
127605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // decompose and recompose.
127705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) {
127805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                c = Character.codePointBefore(s, prevSrc);
127905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                norm16 = normTrie.get(c);
128005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
128105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    prevSrc -= Character.charCount(c);
128205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                }
128305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
128405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (doCompose && prevBoundary != prevSrc) {
128505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                buffer.append(s, prevBoundary, prevSrc);
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int recomposeStartIndex=buffer.length();
128805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // We know there is not a boundary here.
128905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            decomposeShort(s, prevSrc, src, false /* !stopAtCompBoundary */, onlyContiguous,
129005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                           buffer);
129105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Decompose until the next boundary.
129205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            src = decomposeShort(s, src, limit, true /* stopAtCompBoundary */, onlyContiguous,
129305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                 buffer);
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            recompose(buffer, recomposeStartIndex, onlyContiguous);
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!doCompose) {
129605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if(!buffer.equals(s, prevSrc, src)) {
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return false;
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.remove();
13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevBoundary=src;
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
130405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Very similar to compose(): Make the same changes in both places if relevant.
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * doSpan: spanQuickCheckYes (ignore bit 0 of the return value)
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * !doSpan: quickCheck
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return bits 31..1: spanQuickCheckYes (==s.length() if "yes") and
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         bit 0: set if "maybe"; otherwise, if the span length&lt;s.length()
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         then the quick check result is "no"
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int composeQuickCheck(CharSequence s, int src, int limit,
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean onlyContiguous, boolean doSpan) {
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int qcResult=0;
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
131705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int minNoMaybeCP=minCompNoMaybeCP;
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
132005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point,
132105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or with (compYes && ccc==0) properties.
132205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int prevSrc;
132305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int c = 0;
132405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16 = 0;
132505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            for (;;) {
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(src==limit) {
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return (src<<1)|qcResult;  // "yes" or "maybe"
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if( (c=s.charAt(src))<minNoMaybeCP ||
13302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    isCompYesAndZeroCC(norm16=normTrie.getFromU16SingleLead((char)c))
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ) {
13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
133405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    prevSrc = src++;
133505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if(!UTF16.isSurrogate((char)c)) {
133605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        break;
133705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    } else {
133805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        char c2;
133905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if(UTF16Plus.isSurrogateLead(c)) {
134005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if(src!=limit && Character.isLowSurrogate(c2=s.charAt(src))) {
134105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                ++src;
134205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                c=Character.toCodePoint((char)c, c2);
134305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
134405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        } else /* trail surrogate */ {
134505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if(prevBoundary<prevSrc && Character.isHighSurrogate(c2=s.charAt(prevSrc-1))) {
134605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                --prevSrc;
134705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                c=Character.toCodePoint(c2, (char)c);
134805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
135005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if(!isCompYesAndZeroCC(norm16=getNorm16(c))) {
135105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            break;
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
135605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo.
135705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // The current character is either a "noNo" (has a mapping)
135805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or a "maybeYes" (combines backward)
135905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // or a "yesYes" with ccc!=0.
136005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // It is not a Hangul syllable or Jamo L because those have "yes" properties.
136105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
136205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int prevNorm16 = INERT;
136305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (prevBoundary != prevSrc) {
136405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                prevBoundary = prevSrc;
136505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (!norm16HasCompBoundaryBefore(norm16)) {
136605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    c = Character.codePointBefore(s, prevSrc);
136705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    int n16 = getNorm16(c);
136805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (!norm16HasCompBoundaryAfter(n16, onlyContiguous)) {
136905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevBoundary -= Character.charCount(c);
137005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevNorm16 = n16;
137105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isMaybeOrNonZeroCC(norm16)) {
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int cc=getCCFromYesOrMaybe(norm16);
137705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if (onlyContiguous /* FCC */ && cc != 0 &&
137805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) {
137905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // The [prevBoundary..prevSrc[ character
138005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // passed the quick check "yes && ccc==0" test
138105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // but is out of canonical order with the current combining mark.
138205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                } else {
138305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // If !onlyContiguous (not FCC), then we ignore the tccc of
138405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // the previous character which passed the quick check "yes && ccc==0" test.
138505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    for (;;) {
138605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (norm16 < MIN_YES_YES_WITH_CC) {
138705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if (!doSpan) {
138805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                qcResult = 1;
138905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            } else {
139005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                return prevBoundary << 1;  // spanYes does not care to know it's "maybe"
139105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
139205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
139305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (src == limit) {
139405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            return (src<<1) | qcResult;  // "yes" or "maybe"
139505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
139605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        int prevCC = cc;
139705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        c = Character.codePointAt(s, src);
139805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        norm16 = getNorm16(c);
139905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if (isMaybeOrNonZeroCC(norm16)) {
140005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            cc = getCCFromYesOrMaybe(norm16);
140105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            if (!(prevCC <= cc || cc == 0)) {
140205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                break;
140305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            }
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
140505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            break;
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
140705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        src += Character.charCount(c);
140805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    }
140905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // src is after the last in-order combining mark.
141005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if (isCompYesAndZeroCC(norm16)) {
141105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevBoundary = src;
141205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        src += Character.charCount(c);
141305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        continue;
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return prevBoundary<<1;  // "no"
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void composeAndAppend(CharSequence s,
14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean doCompose,
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 boolean onlyContiguous,
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 ReorderingBuffer buffer) {
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0, limit=s.length();
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!buffer.isEmpty()) {
142605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int firstStarterInSrc=findNextCompBoundary(s, 0, limit, onlyContiguous);
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(0!=firstStarterInSrc) {
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int lastStarterInDest=findPreviousCompBoundary(buffer.getStringBuilder(),
142905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                                                               buffer.length(), onlyContiguous);
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder middle=new StringBuilder((buffer.length()-lastStarterInDest)+
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                       firstStarterInSrc+16);
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(buffer.getStringBuilder(), lastStarterInDest, buffer.length());
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(buffer.length()-lastStarterInDest);
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(s, 0, firstStarterInSrc);
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compose(middle, 0, middle.length(), onlyContiguous, true, buffer);
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=firstStarterInSrc;
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doCompose) {
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            compose(s, src, limit, onlyContiguous, true, buffer);
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append(s, src, limit);
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Dual functionality:
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer!=NULL: normalize
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int makeFCD(CharSequence s, int src, int limit, ReorderingBuffer buffer) {
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: In this function we use buffer->appendZeroCC() because we track
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the lead and trail combining classes here, rather than leaving it to
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the ReorderingBuffer.
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The exception is the call to decomposeShort() which uses the buffer
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // in the normal way.
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1.
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Similar to the prevBoundary in the compose() implementation.
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevBoundary=src;
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevSrc;
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c=0;
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int prevFCD16=0;
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int fcd16=0;
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // count code units with lccc==0
14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(prevSrc=src; src!=limit;) {
146605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if((c=s.charAt(src))<minLcccCP) {
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=~c;
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(!singleLeadMightHaveNonZeroFCD16(c)) {
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevFCD16=0;
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++src;
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(UTF16.isSurrogate((char)c)) {
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char c2;
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(UTF16Plus.isSurrogateLead(c)) {
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if((src+1)!=limit && Character.isLowSurrogate(c2=s.charAt(src+1))) {
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                c=Character.toCodePoint((char)c, c2);
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else /* trail surrogate */ {
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(prevSrc<src && Character.isHighSurrogate(c2=s.charAt(src-1))) {
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                --src;
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                c=Character.toCodePoint(c2, (char)c);
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if((fcd16=getFCD16FromNormData(c))<=0xff) {
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevFCD16=fcd16;
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        src+=Character.charCount(c);
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // copy these code units all at once
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(src!=prevSrc) {
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(src==limit) {
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(buffer!=null) {
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        buffer.flushAndAppendZeroCC(s, prevSrc, src);
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src;
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We know that the previous character's lccc==0.
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(prevFCD16<0) {
150505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    // Fetching the fcd16 value was deferred for this below-minLcccCP code point.
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int prev=~prevFCD16;
150705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    if(prev<minDecompNoCP) {
150805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevFCD16=0;
150905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                    } else {
151005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        prevFCD16=getFCD16FromNormData(prev);
151105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        if(prevFCD16>1) {
151205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                            --prevBoundary;
151305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        }
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int p=src-1;
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if( Character.isLowSurrogate(s.charAt(p)) && prevSrc<p &&
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        Character.isHighSurrogate(s.charAt(p-1))
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ) {
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        --p;
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Need to fetch the previous character's FCD value because
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // prevFCD16 was just for the trail surrogate code point.
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevFCD16=getFCD16FromNormData(Character.toCodePoint(s.charAt(p), s.charAt(p+1)));
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Still known to have lccc==0 because its lead surrogate unit had lccc==0.
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(prevFCD16>1) {
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        prevBoundary=p;
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The last lccc==0 character is excluded from the
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // flush-and-append call in case it needs to be modified.
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.flushAndAppendZeroCC(s, prevSrc, prevBoundary);
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.append(s, prevBoundary, src);
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The start of the current character (c).
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevSrc=src;
15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(src==limit) {
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The current character (c) at [prevSrc..src[ has a non-zero lead combining class.
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Check for proper order, and decompose locally if necessary.
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((prevFCD16&0xff)<=(fcd16>>8)) {
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // proper order: prev tccc <= current lccc
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((fcd16&0xff)<=1) {
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    prevBoundary=src;
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(buffer!=null) {
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    buffer.appendZeroCC(c);
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevFCD16=fcd16;
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(buffer==null) {
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return prevBoundary;  // quick check "no"
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Back out the part of the source that we copied or appended
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * already but is now going to be decomposed.
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * prevSrc is set to after what was copied/appended.
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(prevSrc-prevBoundary);
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Find the part of the source that needs to be decomposed,
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * up to the next safe boundary.
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=findNextFCDBoundary(s, src, limit);
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                /*
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * The source text does not fulfill the conditions for FCD.
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 * Decompose and reorder a limited piece of the text.
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                 */
157305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                decomposeShort(s, prevBoundary, src, false, false, buffer);
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevBoundary=src;
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                prevFCD16=0;
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return src;
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public void makeFCDAndAppend(CharSequence s, boolean doMakeFCD, ReorderingBuffer buffer) {
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int src=0, limit=s.length();
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!buffer.isEmpty()) {
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int firstBoundaryInSrc=findNextFCDBoundary(s, 0, limit);
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(0!=firstBoundaryInSrc) {
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStringBuilder(),
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                               buffer.length());
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder middle=new StringBuilder((buffer.length()-lastBoundaryInDest)+
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                       firstBoundaryInSrc+16);
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(buffer.getStringBuilder(), lastBoundaryInDest, buffer.length());
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.removeSuffix(buffer.length()-lastBoundaryInDest);
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                middle.append(s, 0, firstBoundaryInSrc);
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                makeFCD(middle, 0, middle.length(), buffer);
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                src=firstBoundaryInSrc;
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(doMakeFCD) {
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            makeFCD(s, src, limit, buffer);
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            buffer.append(s, src, limit);
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
160305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean hasDecompBoundaryBefore(int c) {
160405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) ||
160505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            norm16HasDecompBoundaryBefore(getNorm16(c));
160605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
160705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean norm16HasDecompBoundaryBefore(int norm16) {
160805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 < minNoNoCompNoMaybeCC) {
160905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
161005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
161105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 >= limitNoNo) {
161205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
161305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
161405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // c decomposes, get everything from the variable-length extra data
161505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
161605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int firstUnit=extraData.charAt(mapping);
161705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // true if leadCC==0 (hasFCDBoundaryBefore())
161805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
161905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
162005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean hasDecompBoundaryAfter(int c) {
162105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (c < minDecompNoCP) {
162205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
162305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
162405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) {
162505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
162605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
162705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return norm16HasDecompBoundaryAfter(getNorm16(c));
162805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
162905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean norm16HasDecompBoundaryAfter(int norm16) {
163005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(norm16 <= minYesNo || isHangulLVT(norm16)) {
163105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;
163205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
163305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 >= limitNoNo) {
163405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (isMaybeOrNonZeroCC(norm16)) {
163505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT;
16362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
163705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Maps to an isCompYesAndZeroCC.
163805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1;
163905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
164005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // c decomposes, get everything from the variable-length extra data
164105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
164205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int firstUnit=extraData.charAt(mapping);
164305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // decomp after-boundary: same as hasFCDBoundaryAfter(),
164405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // fcd16<=1 || trailCC==0
164505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(firstUnit>0x1ff) {
164605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return false;  // trailCC>1
164705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
164805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(firstUnit<=0xff) {
164905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return true;  // trailCC==0
16502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
165105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // if(trailCC==1) test leadCC==0, same as checking for before-boundary
165205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // true if leadCC==0 (hasFCDBoundaryBefore())
165305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (extraData.charAt(mapping-1)&0xff00)==0;
16542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isDecompInert(int c) { return isDecompYesAndZeroCC(getNorm16(c)); }
16562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean hasCompBoundaryBefore(int c) {
165805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
16592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
166005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean hasCompBoundaryAfter(int c, boolean onlyContiguous) {
166105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
16622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
166305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean isCompInert(int c, boolean onlyContiguous) {
166405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int norm16=getNorm16(c);
166505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return isCompYesAndZeroCC(norm16) &&
166605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
166705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            (!onlyContiguous || isInert(norm16) || extraData.charAt(norm16>>OFFSET_SHIFT) <= 0x1ff);
16682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
166905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
167005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean hasFCDBoundaryBefore(int c) { return hasDecompBoundaryBefore(c); }
167105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    public boolean hasFCDBoundaryAfter(int c) { return hasDecompBoundaryAfter(c); }
16722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public boolean isFCDInert(int c) { return getFCD16(c)<=1; }
16732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
16742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMaybe(int norm16) { return minMaybeYes<=norm16 && norm16<=JAMO_VT; }
16752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMaybeOrNonZeroCC(int norm16) { return norm16>=minMaybeYes; }
167605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private static boolean isInert(int norm16) { return norm16==INERT; }
167705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private static boolean isJamoL(int norm16) { return norm16==JAMO_L; }
16782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isJamoVT(int norm16) { return norm16==JAMO_VT; }
167905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int hangulLVT() { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; }
168005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean isHangulLV(int norm16) { return norm16==minYesNo; }
168105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean isHangulLVT(int norm16) {
168205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return norm16==hangulLVT();
168305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
16842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isCompYesAndZeroCC(int norm16) { return norm16<minNoNo; }
16852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // UBool isCompYes(uint16_t norm16) const {
16862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
16872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // UBool isCompYesOrMaybe(uint16_t norm16) const {
16892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16<minNoNo || minMaybeYes<=norm16;
16902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // private boolean hasZeroCCFromDecompYes(int norm16) {
16922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    //     return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
16932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
16942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isDecompYesAndZeroCC(int norm16) {
16952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return norm16<minYesNo ||
16962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               norm16==JAMO_VT ||
16972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller               (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES);
16982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
16992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A little faster and simpler than isDecompYesAndZeroCC() but does not include
17012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the MaybeYes which combine-forward and have ccc=0.
170205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert     * (Standard Unicode 10 normalization does not have such characters.)
17032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isMostDecompYesAndZeroCC(int norm16) {
17052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT;
17062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isDecompNoAlgorithmic(int norm16) { return norm16>=limitNoNo; }
17082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // For use with isCompYes().
17102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC.
17112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // static uint8_t getCCFromYes(uint16_t norm16) {
171205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    //     return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0;
17132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // }
17142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCCFromNoNo(int norm16) {
171505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int mapping=norm16>>OFFSET_SHIFT;
171605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if((extraData.charAt(mapping)&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
171705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return extraData.charAt(mapping-1)&0xff;
17182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
17192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
17202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
172205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    int getTrailCCFromCompYesAndZeroCC(int norm16) {
172305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(norm16<=minYesNo) {
172405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return 0;  // yesYes and Hangul LV have ccc=tccc=0
17252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
172605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here.
172705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return extraData.charAt(norm16>>OFFSET_SHIFT)>>8;  // tccc from yesNo
17282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Requires algorithmic-NoNo.
17322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int mapAlgorithmic(int c, int norm16) {
173305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta;
17342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Requires minYesNo<norm16<limitNoNo.
173705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // private int getMapping(int norm16) { return extraData+(norm16>>OFFSET_SHIFT); }
17382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions, or -1
17412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsListForDecompYes(int norm16) {
174305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
17442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
17452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
17462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((norm16-=minMaybeYes)<0) {
17472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // norm16<minMaybeYes: index into extraData which is a substring at
17482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                //     maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]
17492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // same as (MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16
17502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                norm16+=MIN_NORMAL_MAYBE_YES;  // for yesYes; if Jamo L: harmless empty list
17512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
175205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return norm16>>OFFSET_SHIFT;
17532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
17542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions
17572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsListForComposite(int norm16) {
175905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // A composite has both mapping & compositions list.
176005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
176105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        int firstUnit=maybeYesCompositions.charAt(list);
176205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return list+  // mapping in maybeYesCompositions
176305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            1+  // +1 to skip the first unit with the mapping length
17642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (firstUnit&MAPPING_LENGTH_MASK);  // + mapping length
17652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
176605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int getCompositionsListForMaybe(int norm16) {
176705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES
176805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (norm16-minMaybeYes)>>OFFSET_SHIFT;
176905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
17702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
17712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param c code point must have compositions
17722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return index into maybeYesCompositions
17732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
17742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getCompositionsList(int norm16) {
17752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return isDecompYes(norm16) ?
17762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                getCompositionsListForDecompYes(norm16) :
17772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                getCompositionsListForComposite(norm16);
17782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
17792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
17802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Decompose a short piece of text which is likely to contain characters that
17812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // fail the quick check loop and/or where the quick check loop's overhead
17822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // is unlikely to be amortized.
17832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Called by the compose() and makeFCD() implementations.
17842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Public in Java for collation implementation code.
178505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int decomposeShort(
178605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            CharSequence s, int src, int limit,
178705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            boolean stopAtCompBoundary, boolean onlyContiguous,
178805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            ReorderingBuffer buffer) {
17892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(src<limit) {
17902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, src);
179105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (stopAtCompBoundary && c < minCompNoMaybeCP) {
179205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return src;
179305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
179405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16 = getNorm16(c);
179505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) {
179605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return src;
179705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
17982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            src+=Character.charCount(c);
179905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            decompose(c, norm16, buffer);
180005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
180105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return src;
180205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
18032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
180405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return src;
18052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
180605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private void decompose(int c, int norm16, ReorderingBuffer buffer) {
180705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        // get the decomposition and the lead and trail cc's
180805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 >= limitNoNo) {
180905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (isMaybeOrNonZeroCC(norm16)) {
18102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                buffer.append(c, getCCFromYesOrMaybe(norm16));
181105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                return;
181205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
181305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Maps to an isCompYesAndZeroCC.
181405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            c=mapAlgorithmic(c, norm16);
181505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            norm16=getNorm16(c);
181605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
181705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (norm16 < minYesNo) {
181805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // c does not decompose
181905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            buffer.append(c, 0);
182005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else if(isHangulLV(norm16) || isHangulLVT(norm16)) {
182105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // Hangul syllable: decompose algorithmically
182205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            Hangul.decompose(c, buffer);
182305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        } else {
182405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // c decomposes, get everything from the variable-length extra data
182505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int mapping=norm16>>OFFSET_SHIFT;
182605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int firstUnit=extraData.charAt(mapping);
182705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int length=firstUnit&MAPPING_LENGTH_MASK;
182805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int leadCC, trailCC;
182905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            trailCC=firstUnit>>8;
183005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) {
183105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                leadCC=extraData.charAt(mapping-1)>>8;
18322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
183305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                leadCC=0;
18342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
183505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            ++mapping;  // skip over the firstUnit
183605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            buffer.append(extraData, mapping, mapping+length, leadCC, trailCC);
18372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
18382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
18392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
18402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
18412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Finds the recomposition result for
18422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a forward-combining "lead" character,
18432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * specified with a pointer to its compositions list,
18442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and a backward-combining "trail" character.
18452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>If the lead and trail characters combine, then this function returns
18472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following "compositeAndFwd" value:
18482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <pre>
18492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bits 21..1  composite character
18502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Bit      0  set if the composite is a forward-combining starter
18512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * </pre>
18522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * otherwise it returns -1.
18532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>The compositions list has (trail, compositeAndFwd) pair entries,
18552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * encoded as either pairs or triples of 16-bit units.
18562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The last entry has the high bit of its first unit set.
18572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>The list is sorted by ascending trail characters (there are no duplicates).
18592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A linear search is used.
18602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
18612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * <p>See normalizer2impl.h for a more detailed description
18622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * of the compositions list format.
18632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
18642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int combine(String compositions, int list, int trail) {
18652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int key1, firstUnit;
18662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(trail<COMP_1_TRAIL_LIMIT) {
18672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // trail character is 0..33FF
18682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // result entry may have 2 or 3 units
18692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            key1=(trail<<1);
18702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(key1>(firstUnit=compositions.charAt(list))) {
18712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=2+(firstUnit&COMP_1_TRIPLE);
18722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((firstUnit&COMP_1_TRIPLE)!=0) {
1875f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                    return (compositions.charAt(list+1)<<16)|compositions.charAt(list+2);
18762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
18772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return compositions.charAt(list+1);
18782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
18792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
18802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
18812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // trail character is 3400..10FFFF
18822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // result entry has 3 units
18832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            key1=COMP_1_TRAIL_LIMIT+(((trail>>COMP_1_TRAIL_SHIFT))&~COMP_1_TRIPLE);
18842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int key2=(trail<<COMP_2_TRAIL_SHIFT)&0xffff;
18852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int secondUnit;
18862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
18872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(key1>(firstUnit=compositions.charAt(list))) {
18882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    list+=2+(firstUnit&COMP_1_TRIPLE);
18892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) {
18902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(key2>(secondUnit=compositions.charAt(list+1))) {
18912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if((firstUnit&COMP_1_LAST_TUPLE)!=0) {
18922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            break;
18932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
18942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            list+=3;
18952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
18962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) {
18972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return ((secondUnit&~COMP_2_TRAIL_MASK)<<16)|compositions.charAt(list+2);
18982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
18992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
19002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
19012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
19022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
19032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
19042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
19052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return -1;
19072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
19082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
19092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param list some character's compositions list
19102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @param set recursively receives the composites from these compositions
19112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void addComposites(int list, UnicodeSet set) {
19132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int firstUnit, compositeAndFwd;
19142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
19152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            firstUnit=maybeYesCompositions.charAt(list);
19162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((firstUnit&COMP_1_TRIPLE)==0) {
19172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositeAndFwd=maybeYesCompositions.charAt(list+1);
19182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=2;
19192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
1920f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert                compositeAndFwd=((maybeYesCompositions.charAt(list+1)&~COMP_2_TRAIL_MASK)<<16)|
19212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                maybeYesCompositions.charAt(list+2);
19222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                list+=3;
19232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
19242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int composite=compositeAndFwd>>1;
19252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((compositeAndFwd&1)!=0) {
19262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                addComposites(getCompositionsListForComposite(getNorm16(composite)), set);
19272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
19282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(composite);
19292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while((firstUnit&COMP_1_LAST_TUPLE)==0);
19302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
19312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /*
19322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Recomposes the buffer text starting at recomposeStartIndex
19332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (which is in NFD - decomposed and canonically ordered),
19342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and truncates the buffer contents.
19352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
19362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Note that recomposition never lengthens the text:
19372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Any character consists of either one or two code units;
19382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a composition may contain at most one more code unit than the original starter,
19392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * while the combining mark that is removed has at least one code unit.
19402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
19412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void recompose(ReorderingBuffer buffer, int recomposeStartIndex,
19422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                           boolean onlyContiguous) {
19432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder sb=buffer.getStringBuilder();
19442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int p=recomposeStartIndex;
19452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(p==sb.length()) {
19462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return;
19472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
19482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int starter, pRemove;
19502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int compositionsList;
19512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int c, compositeAndFwd;
19522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16;
19532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cc, prevCC;
19542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean starterIsSupplementary;
19552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Some of the following variables are not used until we have a forward-combining starter
19572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and are only initialized now to avoid compiler warnings.
19582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        compositionsList=-1;  // used as indicator for whether we have a forward-combining starter
19592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        starter=-1;
19602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        starterIsSupplementary=false;
19612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        prevCC=0;
19622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
19632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
19642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c=sb.codePointAt(p);
19652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
19662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            norm16=getNorm16(c);
19672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cc=getCCFromYesOrMaybe(norm16);
19682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if( // this character combines backward and
19692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                isMaybe(norm16) &&
19702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // we have seen a starter that combines forward and
19712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositionsList>=0 &&
19722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // the backward-combining character is not blocked
19732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                (prevCC<cc || prevCC==0)
19742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ) {
19752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isJamoVT(norm16)) {
19762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // c is a Jamo V/T, see if we can compose it with the previous character.
19772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(c<Hangul.JAMO_T_BASE) {
19782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T.
19792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        char prev=(char)(sb.charAt(starter)-Hangul.JAMO_L_BASE);
19802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(prev<Hangul.JAMO_L_COUNT) {
19812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            pRemove=p-1;
19822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            char syllable=(char)
19832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                (Hangul.HANGUL_BASE+
19842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 (prev*Hangul.JAMO_V_COUNT+(c-Hangul.JAMO_V_BASE))*
19852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                 Hangul.JAMO_T_COUNT);
19862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            char t;
19872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(p!=sb.length() && (t=(char)(sb.charAt(p)-Hangul.JAMO_T_BASE))<Hangul.JAMO_T_COUNT) {
19882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                ++p;
19892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                syllable+=t;  // The next character was a Jamo T.
19902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
19912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, syllable);
19922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // remove the Jamo V/T
19932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.delete(pRemove, p);
19942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p=pRemove;
19952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
19962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
19972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    /*
19982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * No "else" for Jamo T:
19992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * Since the input is in NFD, there are no Hangul LV syllables that
20002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * a Jamo T could combine with.
20012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     * All Jamo Ts are combined above when handling Jamo Vs.
20022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                     */
20032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(p==sb.length()) {
20042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
20052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    compositionsList=-1;
20072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
20082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else if((compositeAndFwd=combine(maybeYesCompositions, compositionsList, c))>=0) {
20092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The starter and the combining mark (c) do combine.
20102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int composite=compositeAndFwd>>1;
20112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Remove the combining mark.
20132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    pRemove=p-Character.charCount(c);  // pRemove & p: start & limit of the combining mark
20142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    sb.delete(pRemove, p);
20152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    p=pRemove;
20162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Replace the starter with the composite.
20172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(starterIsSupplementary) {
20182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(composite>0xffff) {
20192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // both are supplementary
20202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
20212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite));
20222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
20232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.setCharAt(starter, (char)c);
20242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sb.deleteCharAt(starter+1);
20252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // The composite is shorter than the starter,
20262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            // move the intermediate characters forward one.
20272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            starterIsSupplementary=false;
20282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            --p;
20292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
20302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(composite>0xffff) {
20312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // The composite is longer than the starter,
20322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // move the intermediate characters back one.
20332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=true;
20342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.setCharAt(starter, UTF16.getLeadSurrogate(composite));
20352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.insert(starter+1, UTF16.getTrailSurrogate(composite));
20362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        ++p;
20372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
20382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // both are on the BMP
20392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        sb.setCharAt(starter, (char)composite);
20402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Keep prevCC because we removed the combining mark.
20432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(p==sb.length()) {
20452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
20462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Is the composite a starter that combines forward?
20482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if((compositeAndFwd&1)!=0) {
20492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        compositionsList=
20502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            getCompositionsListForComposite(getNorm16(composite));
20512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
20522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        compositionsList=-1;
20532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // We combined; continue with looking for compositions.
20562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    continue;
20572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // no combination this time
20612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            prevCC=cc;
20622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(p==sb.length()) {
20632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
20642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // If c did not combine, then check if it is a starter.
20672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cc==0) {
20682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Found a new starter.
20692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((compositionsList=getCompositionsListForDecompYes(norm16))>=0) {
20702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // It may combine with something, prepare for it.
20712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(c<=0xffff) {
20722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=false;
20732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starter=p-1;
20742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
20752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starterIsSupplementary=true;
20762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        starter=p-2;
20772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
20782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
20792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(onlyContiguous) {
20802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // FCC: no discontiguous compositions; any intervening character blocks.
20812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                compositionsList=-1;
20822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
20832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
20842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        buffer.flush();
20852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
20862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
20872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public int composePair(int a, int b) {
20882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int norm16=getNorm16(a);  // maps an out-of-range 'a' to inert norm16=0
20892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int list;
20902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isInert(norm16)) {
20912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
20922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(norm16<minYesNoMappingsOnly) {
209305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            // a combines forward.
20942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isJamoL(norm16)) {
20952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b-=Hangul.JAMO_V_BASE;
20962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(0<=b && b<Hangul.JAMO_V_COUNT) {
20972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return
20982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        (Hangul.HANGUL_BASE+
20992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         ((a-Hangul.JAMO_L_BASE)*Hangul.JAMO_V_COUNT+b)*
21002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                         Hangul.JAMO_T_COUNT);
21012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
21022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
21032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
210405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            } else if(isHangulLV(norm16)) {
21052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                b-=Hangul.JAMO_T_BASE;
210605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                if(0<b && b<Hangul.JAMO_T_COUNT) {  // not b==0!
21072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return a+b;
21082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
21092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return -1;
21102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
21112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
21122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // 'a' has a compositions list in extraData
211305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                list=((MIN_NORMAL_MAYBE_YES-minMaybeYes)+norm16)>>OFFSET_SHIFT;
21142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(norm16>minYesNo) {  // composite 'a' has both mapping & compositions list
21152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    list+=  // mapping pointer
211605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        1+  // +1 to skip the first unit with the mapping length
211705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                        (maybeYesCompositions.charAt(list)&MAPPING_LENGTH_MASK);  // + mapping length
21182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
21192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) {
21212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
21222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
212305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            list=getCompositionsListForMaybe(norm16);  // offset into maybeYesCompositions
21242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(b<0 || 0x10ffff<b) {  // combine(list, b) requires a valid code point b
21262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return -1;
21272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return combine(maybeYesCompositions, list, b)>>1;
21292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
21322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Does c have a composition boundary before it?
21332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * True if its decomposition begins with a character that has
21342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ccc=0 && NFC_QC=Yes (isCompYesAndZeroCC()).
21352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * As a shortcut, this is true if c itself has ccc=0 && NFC_QC=Yes
21362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (isCompYesAndZeroCC()) so we need not decompose.
21372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
21382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean hasCompBoundaryBefore(int c, int norm16) {
213905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16);
214005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
214105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean norm16HasCompBoundaryBefore(int norm16) {
214205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16);
214305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
214405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean hasCompBoundaryBefore(CharSequence s, int src, int limit) {
214505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return src == limit || hasCompBoundaryBefore(Character.codePointAt(s, src));
21462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
214705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean norm16HasCompBoundaryAfter(int norm16, boolean onlyContiguous) {
214805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
214905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16));
215005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
215105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean hasCompBoundaryAfter(CharSequence s, int start, int p, boolean onlyContiguous) {
215205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return start == p || hasCompBoundaryAfter(Character.codePointBefore(s, p), onlyContiguous);
215305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
215405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */
215505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private boolean isTrailCC01ForCompBoundaryAfter(int norm16) {
215605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ?
215705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : extraData.charAt(norm16 >> OFFSET_SHIFT) <= 0x1ff);
215805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
215905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
216005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int findPreviousCompBoundary(CharSequence s, int p, boolean onlyContiguous) {
21612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p>0) {
21622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointBefore(s, p);
216305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16 = getNorm16(c);
216405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
216505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
216605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
21672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p-=Character.charCount(c);
216805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if(hasCompBoundaryBefore(c, norm16)) {
21692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
217405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int findNextCompBoundary(CharSequence s, int p, int limit, boolean onlyContiguous) {
21752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p<limit) {
21762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, p);
21772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int norm16=normTrie.get(c);
21782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(hasCompBoundaryBefore(c, norm16)) {
21792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
21812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
218205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) {
218305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
218405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
21852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
21862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
21872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
21882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
21892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findPreviousFCDBoundary(CharSequence s, int p) {
21902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p>0) {
21912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointBefore(s, p);
219205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16;
219305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16 = getNorm16(c))) {
219405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
219505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
21962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p-=Character.charCount(c);
219705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (norm16HasDecompBoundaryBefore(norm16)) {
21982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
21992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
22002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
22022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findNextFCDBoundary(CharSequence s, int p, int limit) {
22042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(p<limit) {
22052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int c=Character.codePointAt(s, p);
220605fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            int norm16;
220705fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16 = getNorm16(c))) {
22082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
22092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
22102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p+=Character.charCount(c);
221105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            if (norm16HasDecompBoundaryAfter(norm16)) {
221205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert                break;
221305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            }
22142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return p;
22162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
221805fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int getPreviousTrailCC(CharSequence s, int start, int p) {
221905fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        if (start == p) {
222005fa7802d0874812c234a29745586677ee5837eaFredrik Roubert            return 0;
222105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        }
222205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert        return getFCD16(Character.codePointBefore(s, p));
222305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    }
222405fa7802d0874812c234a29745586677ee5837eaFredrik Roubert
22252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void addToStartSet(Trie2Writable newData, int origin, int decompLead) {
22262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int canonValue=newData.get(decompLead);
22272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) {
22282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // origin is the first character whose decomposition starts with
22292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // the character for which we are setting the value.
22302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            newData.set(decompLead, canonValue|origin);
22312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
22322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // origin is not the first character, or it is U+0000.
22332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UnicodeSet set;
22342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((canonValue&CANON_HAS_SET)==0) {
22352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int firstOrigin=canonValue&CANON_VALUE_MASK;
22362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|canonStartSets.size();
22372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                newData.set(decompLead, canonValue);
22382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                canonStartSets.add(set=new UnicodeSet());
22392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(firstOrigin!=0) {
22402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    set.add(firstOrigin);
22412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
22422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
22432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                set=canonStartSets.get(canonValue&CANON_VALUE_MASK);
22442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
22452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            set.add(origin);
22462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
22472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
22482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @SuppressWarnings("unused")
22502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private VersionInfo dataVersion;
22512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
225205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    // BMP code point thresholds for quick check loops looking at single UTF-16 code units.
22532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minDecompNoCP;
22542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minCompNoMaybeCP;
225505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int minLcccCP;
22562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // Norm16 value thresholds for quick check combinations and types of extra data.
22582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minYesNo;
22592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minYesNoMappingsOnly;
22602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minNoNo;
226105fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int minNoNoCompBoundaryBefore;
226205fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int minNoNoCompNoMaybeCC;
226305fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int minNoNoEmpty;
22642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int limitNoNo;
226505fa7802d0874812c234a29745586677ee5837eaFredrik Roubert    private int centerNoNoDelta;
22662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int minMaybeYes;
22672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Trie2_16 normTrie;
22692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String maybeYesCompositions;
22702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private String extraData;  // mappings and/or compositions for yesYes, yesNo & noNo characters
22712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private byte[] smallFCD;  // [0x100] one bit per 32 BMP code points, set if any FCD!=0
22722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Trie2_32 canonIterData;
22742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private ArrayList<UnicodeSet> canonStartSets;
22752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
22762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    // bits in canonIterData
22772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_NOT_SEGMENT_STARTER = 0x80000000;
22782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_HAS_COMPOSITIONS = 0x40000000;
22792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_HAS_SET = 0x200000;
22802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int CANON_VALUE_MASK = 0x1fffff;
22812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
2282