12ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/* GENERATED SOURCE. DO NOT MODIFY. */
2f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
3f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
42ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller/*
52ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*******************************************************************************
62ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Copyright (C) 2013-2015, International Business Machines
72ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* Corporation and others.  All Rights Reserved.
82ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*******************************************************************************
92ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* CollationBuilder.java, ported from collationbuilder.h/.cpp
102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*
112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* C++ version created on: 2013may06
122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller* created by: Markus W. Scherer
132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller*/
142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpackage android.icu.impl.coll;
162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport java.text.ParseException;
182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Norm2AllModes;
202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl;
212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.impl.Normalizer2Impl.Hangul;
222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.lang.UScript;
232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.CanonicalIterator;
242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Collator;
252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.Normalizer2;
262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSet;
272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.text.UnicodeSetIterator;
282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerimport android.icu.util.ULocale;
292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
301537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller/**
311537b2f39245c07b00aa78c3600f7aebcb172490Neil Fuller * @hide Only a subset of ICU is exposed in Android
32836e6b40a94ec3fb7545a76cb072960442b7eee9Neil Fuller */
332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fullerpublic final class CollationBuilder extends CollationRuleParser.Sink {
342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final boolean DEBUG = false;
352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final class BundleImporter implements CollationRuleParser.Importer {
362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        BundleImporter() {}
37f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public String getRules(String localeID, String collationType) {
392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return CollationLoader.loadRules(new ULocale(localeID), collationType);
402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public CollationBuilder(CollationTailoring b) {
442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nfd = Normalizer2.getNFDInstance();
452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fcd = Norm2AllModes.getFCDNormalizer2();
462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nfcImpl = Norm2AllModes.getNFCInstance().impl;
472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        base = b;
482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        baseData = b.data;
492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        rootElements = new CollationRootElements(b.data.rootElements);
502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        variableTop = 0;
512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dataBuilder = new CollationDataBuilder();
522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        fastLatinEnabled = true;
532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        cesLength = 0;
542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        rootPrimaryIndexes = new UVector32();
552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nodes = new UVector64();
562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nfcImpl.ensureCanonIterData();
572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dataBuilder.initForTailoring(baseData);
582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    public CollationTailoring parseAndBuild(String ruleString) throws ParseException {
612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(baseData.rootElements == null) {
622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // C++ U_MISSING_RESOURCE_ERROR
632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException(
642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    "missing root elements data, tailoring not supported");
652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationTailoring tailoring = new CollationTailoring(base.settings);
672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationRuleParser parser = new CollationRuleParser(baseData);
682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: This always bases &[last variable] and &[first regular]
692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // on the root collator's maxVariable/variableTop.
702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If we wanted this to change after [maxVariable x], then we would keep
712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // the tailoring.settings pointer here and read its variableTop when we need it.
722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // See http://unicode.org/cldr/trac/ticket/6070
732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        variableTop = base.settings.readOnly().variableTop;
742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        parser.setSink(this);
752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In Java, there is only one Importer implementation.
762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In C++, the importer is a parameter for this method.
772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        parser.setImporter(new BundleImporter());
782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationSettings ownedSettings = tailoring.settings.copyOnWrite();
792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        parser.parse(ruleString, ownedSettings);
802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(dataBuilder.hasMappings()) {
812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            makeTailoredCEs();
822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            closeOverComposites();
832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            finalizeCEs();
842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Copy all of ASCII, and Latin-1 letters, into each tailoring.
852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            optimizeSet.add(0, 0x7f);
862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            optimizeSet.add(0xc0, 0xff);
872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Hangul is decomposed on the fly during collation,
882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // and the tailoring data is always built with HANGUL_TAG specials.
892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            optimizeSet.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataBuilder.optimize(optimizeSet);
912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            tailoring.ensureOwnedData();
922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(fastLatinEnabled) { dataBuilder.enableFastLatin(); }
932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataBuilder.build(tailoring.ownedData);
942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // C++ tailoring.builder = dataBuilder;
952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataBuilder = null;
962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            tailoring.data = baseData;
982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
1002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                tailoring.data, ownedSettings,
1012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ownedSettings.fastLatinPrimaries);
1022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        tailoring.setRules(ruleString);
1032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In Java, we do not have a rules version.
1042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // In C++, the genrb build tool reads and supplies one,
1052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and the rulesVersion is a parameter for this method.
1062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        tailoring.setVersion(base.version, 0 /* rulesVersion */);
1072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return tailoring;
1082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
1092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Implements CollationRuleParser.Sink. */
1112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Override
1122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void addReset(int strength, CharSequence str) {
1132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(str.length() != 0);
1142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(str.charAt(0) == CollationRuleParser.POS_LEAD) {
1152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ces[0] = getSpecialResetPosition(str);
1162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cesLength = 1;
1172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert((ces[0] & Collation.CASE_AND_QUATERNARY_MASK) == 0);
1182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
1192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // normal reset to a character or string
1202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String nfdString = nfd.normalize(str);
1212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
1222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
1232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException(
1242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "reset position maps to too many collation elements (more than 31)");
1252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strength == Collator.IDENTICAL) { return; }  // simple reset-at-position
1282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // &[before strength]position
1302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(Collator.PRIMARY <= strength && strength <= Collator.TERTIARY);
1312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int index = findOrInsertNodeForCEs(strength);
1322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long node = nodes.elementAti(index);
1342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If the index is for a "weaker" node,
1352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // then skip backwards over this and further "weaker" nodes.
1362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(strengthFromNode(node) > strength) {
1372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = previousIndexFromNode(node);
1382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(index);
1392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
1412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Find or insert a node whose index we will put into a temporary CE.
1422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strengthFromNode(node) == strength && isTailoredNode(node)) {
1432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Reset to just before this same-strength tailored node.
1442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = previousIndexFromNode(node);
1452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(strength == Collator.PRIMARY) {
1462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // root primary node (has no previous index)
1472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long p = weight32FromNode(node);
1482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(p == 0) {
1492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
1502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "reset primary-before ignorable not possible");
1512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(p <= rootElements.getFirstPrimary()) {
1532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // There is no primary gap between ignorables and the space-first-primary.
1542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
1552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "reset primary-before first non-ignorable not supported");
1562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(p == Collation.FIRST_TRAILING_PRIMARY) {
1582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We do not support tailoring to an unassigned-implicit CE.
1592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
1602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "reset primary-before [first trailing] not supported");
1612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            p = rootElements.getPrimaryBefore(p, baseData.isCompressiblePrimary(p));
1632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = findOrInsertNodeForPrimary(p);
1642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Go to the last node in this list:
1652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Tailor after the last node between adjacent root nodes.
1662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
1672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nodes.elementAti(index);
1682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int nextIndex = nextIndexFromNode(node);
1692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(nextIndex == 0) { break; }
1702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = nextIndex;
1712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
1732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // &[before 2] or &[before 3]
1742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = findCommonNode(index, Collator.SECONDARY);
1752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength >= Collator.TERTIARY) {
1762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = findCommonNode(index, Collator.TERTIARY);
1772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
1782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // findCommonNode() stayed on the stronger node or moved to
1792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // an explicit common-weight node of the reset-before strength.
1802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(index);
1812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strengthFromNode(node) == strength) {
1822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Found a same-strength node with an explicit weight.
1832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int weight16 = weight16FromNode(node);
1842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(weight16 == 0) {
1852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    throw new UnsupportedOperationException(
1862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            (strength == Collator.SECONDARY) ?
1872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    "reset secondary-before secondary ignorable not possible" :
1882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    "reset tertiary-before completely ignorable not possible");
1892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
1902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                assert(weight16 > Collation.BEFORE_WEIGHT16);
1912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Reset to just before this node.
1922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Insert the preceding same-level explicit weight if it is not there already.
1932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Which explicit weight immediately precedes this one?
1942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                weight16 = getWeight16Before(index, node, strength);
1952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Does this preceding weight have a node?
1962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int previousWeight16;
1972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int previousIndex = previousIndexFromNode(node);
1982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for(int i = previousIndex;; i = previousIndexFromNode(node)) {
1992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = nodes.elementAti(i);
2002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int previousStrength = strengthFromNode(node);
2012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(previousStrength < strength) {
2022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        assert(weight16 >= Collation.COMMON_WEIGHT16 || i == previousIndex);
2032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Either the reset element has an above-common weight and
2042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // the parent node provides the implied common weight,
2052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // or the reset element has a weight<=common in the node
2062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // right after the parent, and we need to insert the preceding weight.
2072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        previousWeight16 = Collation.COMMON_WEIGHT16;
2082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(previousStrength == strength && !isTailoredNode(node)) {
2102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        previousWeight16 = weight16FromNode(node);
2112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
2122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
2132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Skip weaker nodes and same-level tailored nodes.
2142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(previousWeight16 == weight16) {
2162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // The preceding weight has a node,
2172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // maybe with following weaker or tailored nodes.
2182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Reset to the last of them.
2192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index = previousIndex;
2202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
2212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Insert a node with the preceding weight, reset to that.
2222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
2232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index = insertNodeBetween(previousIndex, index, node);
2242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
2252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
2262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Found a stronger node with implied strength-common weight.
2272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int weight16 = getWeight16Before(index, node, strength);
2282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = findOrInsertWeakNode(index, weight16, strength);
2292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
2302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Strength of the temporary CE = strength of its reset position.
2312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Code above raises an error if the before-strength is stronger.
2322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = ceStrength(ces[cesLength - 1]);
2332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength);
2352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
2382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Returns the secondary or tertiary weight preceding the current node's weight.
2392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * node=nodes[index].
2402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
2412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int getWeight16Before(int index, long node, int level) {
2422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(strengthFromNode(node) < level || !isTailoredNode(node));
2432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Collect the root CE weights if this node is for a root CE.
2442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If it is not, then return the low non-primary boundary for a tailored CE.
2452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int t;
2462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strengthFromNode(node) == Collator.TERTIARY) {
2472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            t = weight16FromNode(node);
2482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            t = Collation.COMMON_WEIGHT16;  // Stronger node with implied common weight.
2502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(strengthFromNode(node) > Collator.SECONDARY) {
2522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = previousIndexFromNode(node);
2532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(index);
2542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isTailoredNode(node)) {
2562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return Collation.BEFORE_WEIGHT16;
2572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int s;
2592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strengthFromNode(node) == Collator.SECONDARY) {
2602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            s = weight16FromNode(node);
2612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            s = Collation.COMMON_WEIGHT16;  // Stronger node with implied common weight.
2632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(strengthFromNode(node) > Collator.PRIMARY) {
2652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = previousIndexFromNode(node);
2662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(index);
2672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isTailoredNode(node)) {
2692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return Collation.BEFORE_WEIGHT16;
2702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // [p, s, t] is a root CE. Return the preceding weight for the requested level.
2722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long p = weight32FromNode(node);
2732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int weight16;
2742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(level == Collator.SECONDARY) {
2752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            weight16 = rootElements.getSecondaryBefore(p, s);
2762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
2772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            weight16 = rootElements.getTertiaryBefore(p, s, t);
2782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0);
2792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
2802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return weight16;
2812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
2822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
2832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private long getSpecialResetPosition(CharSequence str) {
2842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(str.length() == 2);
2852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long ce;
2862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int strength = Collator.PRIMARY;
2872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        boolean isBoundary = false;
2882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationRuleParser.Position pos =
2892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                CollationRuleParser.POSITION_VALUES[str.charAt(1) - CollationRuleParser.POS_BASE];
2902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        switch(pos) {
2912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_TERTIARY_IGNORABLE:
2922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Quaternary CEs are not supported.
2932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Non-zero quaternary weights are possible only on tertiary or stronger CEs.
2942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
2952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_TERTIARY_IGNORABLE:
2962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
2972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_SECONDARY_IGNORABLE: {
2982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Look for a tailored tertiary node after [0, 0, 0].
2992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = findOrInsertNodeForRootCE(0, Collator.TERTIARY);
3002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long node = nodes.elementAti(index);
3012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((index = nextIndexFromNode(node)) != 0) {
3022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nodes.elementAti(index);
3032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                assert(strengthFromNode(node) <= Collator.TERTIARY);
3042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isTailoredNode(node) && strengthFromNode(node) == Collator.TERTIARY) {
3052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return tempCEFromIndexAndStrength(index, Collator.TERTIARY);
3062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return rootElements.getFirstTertiaryCE();
3092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // No need to look for nodeHasAnyBefore() on a tertiary node.
3102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_SECONDARY_IGNORABLE:
3122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.getLastTertiaryCE();
3132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.TERTIARY;
3142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_PRIMARY_IGNORABLE: {
3162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Look for a tailored secondary node after [0, 0, *].
3172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = findOrInsertNodeForRootCE(0, Collator.SECONDARY);
3182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long node = nodes.elementAti(index);
3192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while((index = nextIndexFromNode(node)) != 0) {
3202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nodes.elementAti(index);
3212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                strength = strengthFromNode(node);
3222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(strength < Collator.SECONDARY) { break; }
3232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(strength == Collator.SECONDARY) {
3242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(isTailoredNode(node)) {
3252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(nodeHasBefore3(node)) {
3262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
3272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            assert(isTailoredNode(nodes.elementAti(index)));
3282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
3292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return tempCEFromIndexAndStrength(index, Collator.SECONDARY);
3302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
3312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
3322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
3332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
3342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
3352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.getFirstSecondaryCE();
3362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.SECONDARY;
3372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_PRIMARY_IGNORABLE:
3402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.getLastSecondaryCE();
3412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            strength = Collator.SECONDARY;
3422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_VARIABLE:
3442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.getFirstPrimaryCE();
3452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isBoundary = true;  // FractionalUCA.txt: FDD1 00A0, SPACE first primary
3462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_VARIABLE:
3482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.lastCEWithPrimaryBefore(variableTop + 1);
3492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_REGULAR:
3512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.firstCEWithPrimaryAtLeast(variableTop + 1);
3522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isBoundary = true;  // FractionalUCA.txt: FDD1 263A, SYMBOL first primary
3532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_REGULAR:
3552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Use the Hani-first-primary rather than the actual last "regular" CE before it,
3562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for backward compatibility with behavior before the introduction of
3572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // script-first-primary CEs in the root collator.
3582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = rootElements.firstCEWithPrimaryAtLeast(
3592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                baseData.getFirstPrimaryForGroup(UScript.HAN));
3602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_IMPLICIT:
3622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = baseData.getSingleCE(0x4e00);
3632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_IMPLICIT:
3652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We do not support tailoring to an unassigned-implicit CE.
3662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException(
3672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    "reset to [last implicit] not supported");
3682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case FIRST_TRAILING:
3692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce = Collation.makeCE(Collation.FIRST_TRAILING_PRIMARY);
3702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isBoundary = true;  // trailing first primary (there is no mapping for it)
3712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            break;
3722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        case LAST_TRAILING:
3732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new IllegalArgumentException("LDML forbids tailoring to U+FFFF");
3742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        default:
3752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(false);
3762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return 0;
3772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
3782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
3792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int index = findOrInsertNodeForRootCE(ce, strength);
3802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long node = nodes.elementAti(index);
3812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((pos.ordinal() & 1) == 0) {
3822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // even pos = [first xyz]
3832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!nodeHasAnyBefore(node) && isBoundary) {
3842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // A <group> first primary boundary is artificially added to FractionalUCA.txt.
3852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // It is reachable via its special contraction, but is not normally used.
3862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Find the first character tailored after the boundary CE,
3872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // or the first real root CE after it.
3882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((index = nextIndexFromNode(node)) != 0) {
3892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // If there is a following node, then it must be tailored
3902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // because there are no root CEs with a boundary primary
3912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // and non-common secondary/tertiary weights.
3922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = nodes.elementAti(index);
3932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    assert(isTailoredNode(node));
3942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ce = tempCEFromIndexAndStrength(index, strength);
3952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
3962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    assert(strength == Collator.PRIMARY);
3972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    long p = ce >>> 32;
3982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int pIndex = rootElements.findPrimary(p);
3992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    boolean isCompressible = baseData.isCompressiblePrimary(p);
4002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    p = rootElements.getPrimaryAfter(p, pIndex, isCompressible);
4012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ce = Collation.makeCE(p);
4022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index = findOrInsertNodeForRootCE(ce, Collator.PRIMARY);
4032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = nodes.elementAti(index);
4042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(nodeHasAnyBefore(node)) {
4072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Get the first node that was tailored before this one at a weaker strength.
4082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(nodeHasBefore2(node)) {
4092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
4102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node = nodes.elementAti(index);
4112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(nodeHasBefore3(node)) {
4132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
4142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
4152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                assert(isTailoredNode(nodes.elementAti(index)));
4162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce = tempCEFromIndexAndStrength(index, strength);
4172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
4192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // odd pos = [last xyz]
4202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Find the last node that was tailored after the [last xyz]
4212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // at a strength no greater than the position's strength.
4222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
4232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int nextIndex = nextIndexFromNode(node);
4242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(nextIndex == 0) { break; }
4252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long nextNode = nodes.elementAti(nextIndex);
4262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(strengthFromNode(nextNode) < strength) { break; }
4272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = nextIndex;
4282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nextNode;
4292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Do not make a temporary CE for a root node.
4312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // This last node might be the node for the root CE itself,
4322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // or a node with a common secondary or tertiary weight.
4332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(isTailoredNode(node)) {
4342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce = tempCEFromIndexAndStrength(index, strength);
4352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ce;
4382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
4392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Implements CollationRuleParser.Sink. */
441f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert    @Override
4422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) {
4432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String nfdPrefix;
4442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(prefix.length() == 0) {
4452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nfdPrefix = "";
4462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
4472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nfdPrefix = nfd.normalize(prefix);
4482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String nfdString = nfd.normalize(str);
4502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The runtime code decomposes Hangul syllables on the fly,
4522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // with recursive processing but without making the Jamo pieces visible for matching.
4532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // It does not work with certain types of contextual mappings.
4542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int nfdLength = nfdString.length();
4552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(nfdLength >= 2) {
4562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            char c = nfdString.charAt(0);
4572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(Hangul.isJamoL(c) || Hangul.isJamoV(c)) {
4582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // While handling a Hangul syllable, contractions starting with Jamo L or V
4592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // would not see the following Jamo of that syllable.
4602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
4612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "contractions starting with conjoining Jamo L or V not supported");
4622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            c = nfdString.charAt(nfdLength - 1);
4642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(Hangul.isJamoL(c) ||
4652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    (Hangul.isJamoV(c) && Hangul.isJamoL(nfdString.charAt(nfdLength - 2)))) {
4662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // A contraction ending with Jamo L or L+V would require
4672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // generating Hangul syllables in addTailComposites() (588 for a Jamo L),
4682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // or decomposing a following Hangul syllable on the fly, during contraction matching.
4692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
4702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "contractions ending with conjoining Jamo L or L+V not supported");
4712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // A Hangul syllable completely inside a contraction is ok.
4732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
4742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: If there is a prefix, then the parser checked that
4752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // both the prefix and the string beging with NFC boundaries (not Jamo V or T).
4762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0))
4772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // (While handling a Hangul syllable, prefixes on Jamo V or T
4782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // would not see the previous Jamo of that syllable.)
4792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
4802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strength != Collator.IDENTICAL) {
4812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Find the node index after which we insert the new tailored node.
4822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = findOrInsertNodeForCEs(strength);
4832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(cesLength > 0);
4842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long ce = ces[cesLength - 1];
4852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength == Collator.PRIMARY && !isTempCE(ce) && (ce >>> 32) == 0) {
4862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // There is no primary gap between ignorables and the space-first-primary.
4872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
4882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "tailoring primary after ignorables not supported");
4892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength == Collator.QUATERNARY && ce == 0) {
4912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The CE data structure does not support non-zero quaternary weights
4922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // on tertiary ignorables.
4932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new UnsupportedOperationException(
4942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "tailoring quaternary after tertiary ignorables not supported");
4952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
4962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Insert the new tailored node.
4972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = insertTailoredNodeAfter(index, strength);
4982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Strength of the temporary CE:
4992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The new relation may yield a stronger CE but not a weaker one.
5002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int tempStrength = ceStrength(ce);
5012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength < tempStrength) { tempStrength = strength; }
5022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ces[cesLength - 1] = tempCEFromIndexAndStrength(index, tempStrength);
5032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        setCaseBits(nfdString);
5062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int cesLengthBeforeExtension = cesLength;
5082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(extension.length() != 0) {
5092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String nfdExtension = nfd.normalize(extension);
5102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cesLength = dataBuilder.getCEs(nfdExtension, ces, cesLength);
5112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
5122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                throw new IllegalArgumentException(
5132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        "extension string adds too many collation elements (more than 31 total)");
5142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int ce32 = Collation.UNASSIGNED_CE32;
5172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((!nfdPrefix.contentEquals(prefix) || !nfdString.contentEquals(str)) &&
5182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                !ignorePrefix(prefix) && !ignoreString(str)) {
5192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Map from the original input to the CEs.
5202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We do this in case the canonical closure is incomplete,
5212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // so that it is possible to explicitly provide the missing mappings.
5222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce32 = addIfDifferent(prefix, str, ces, cesLength, ce32);
5232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        addWithClosure(nfdPrefix, nfdString, ces, cesLength, ce32);
5252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        cesLength = cesLengthBeforeExtension;
5262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Picks one of the current CEs and finds or inserts a node in the graph
5302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * for the CE + strength.
5312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findOrInsertNodeForCEs(int strength) {
5332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(Collator.PRIMARY <= strength && strength <= Collator.QUATERNARY);
5342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Find the last CE that is at least as "strong" as the requested difference.
5362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: Stronger is smaller (Collator.PRIMARY=0).
5372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long ce;
5382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;; --cesLength) {
5392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cesLength == 0) {
5402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce = ces[0] = 0;
5412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                cesLength = 1;
5422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                break;
5432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
5442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce = ces[cesLength - 1];
5452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(ceStrength(ce) <= strength) { break; }
5472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(isTempCE(ce)) {
5502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // No need to findCommonNode() here for lower levels
5512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // because insertTailoredNodeAfter() will do that anyway.
5522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return indexFromTempCE(ce);
5532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // root CE
5562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((int)(ce >>> 56) == Collation.UNASSIGNED_IMPLICIT_BYTE) {
5572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            throw new UnsupportedOperationException(
5582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    "tailoring relative to an unassigned code point not supported");
5592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return findOrInsertNodeForRootCE(ce, strength);
5612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findOrInsertNodeForRootCE(long ce, int strength) {
5642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert((int)(ce >>> 56) != Collation.UNASSIGNED_IMPLICIT_BYTE);
5652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Find or insert the node for each of the root CE's weights,
5672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // down to the requested level/strength.
5682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Root CEs must have common=zero quaternary weights (for which we never insert any nodes).
5692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert((ce & 0xc0) == 0);
5702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int index = findOrInsertNodeForPrimary(ce >>> 32);
5712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strength >= Collator.SECONDARY) {
5722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int lower32 = (int)ce;
5732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY);
5742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength >= Collator.TERTIARY) {
5752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = findOrInsertWeakNode(index, lower32 & Collation.ONLY_TERTIARY_MASK,
5762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                            Collator.TERTIARY);
5772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
5782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
5792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return index;
5802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
5812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
5822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
5832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Like Java Collections.binarySearch(List, key, Comparator).
5842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
5852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the index>=0 where the item was found,
5862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         or the index<0 for inserting the string at ~index in sorted order
5872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *         (index into rootPrimaryIndexes)
5882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
5892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int binarySearchForRootPrimaryNode(
5902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int[] rootPrimaryIndexes, int length, long[] nodes, long p) {
5912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(length == 0) { return ~0; }
5922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int start = 0;
5932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int limit = length;
5942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for (;;) {
595f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert            int i = (int)(((long)start + (long)limit) / 2);
5962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long node = nodes[rootPrimaryIndexes[i]];
5972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long nodePrimary = node >>> 32;  // weight32FromNode(node)
5982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if (p == nodePrimary) {
5992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return i;
6002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if (p < nodePrimary) {
6012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (i == start) {
6022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return ~start;  // insert s before i
6032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                limit = i;
6052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
6062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if (i == start) {
6072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    return ~(start + 1);  // insert s after i
6082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                start = i;
6102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Finds or inserts the node for a root CE's primary weight. */
6152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findOrInsertNodeForPrimary(long p) {
6162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int rootIndex = binarySearchForRootPrimaryNode(
6172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rootPrimaryIndexes.getBuffer(), rootPrimaryIndexes.size(), nodes.getBuffer(), p);
6182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(rootIndex >= 0) {
6192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return rootPrimaryIndexes.elementAti(rootIndex);
6202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
6212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Start a new list of nodes with this primary.
6222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int index = nodes.size();
6232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nodes.addElement(nodeFromWeight32(p));
6242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            rootPrimaryIndexes.insertElementAt(index, ~rootIndex);
6252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return index;
6262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Finds or inserts the node for a secondary or tertiary weight. */
6302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findOrInsertWeakNode(int index, int weight16, int level) {
6312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(0 <= index && index < nodes.size());
6322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(Collator.SECONDARY <= level && level <= Collator.TERTIARY);
6332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(weight16 == Collation.COMMON_WEIGHT16) {
6352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return findCommonNode(index, level);
6362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // If this will be the first below-common weight for the parent node,
6392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // then we will also need to insert a common weight after it.
6402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long node = nodes.elementAti(index);
6412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(strengthFromNode(node) < level);  // parent node is stronger
6422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(weight16 != 0 && weight16 < Collation.COMMON_WEIGHT16) {
6432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int hasThisLevelBefore = level == Collator.SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3;
6442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if((node & hasThisLevelBefore) == 0) {
6452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The parent node has an implied level-common weight.
6462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long commonNode =
6472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nodeFromWeight16(Collation.COMMON_WEIGHT16) | nodeFromStrength(level);
6482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(level == Collator.SECONDARY) {
6492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Move the HAS_BEFORE3 flag from the parent node
6502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // to the new secondary common node.
6512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    commonNode |= node & HAS_BEFORE3;
6522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    node &= ~(long)HAS_BEFORE3;
6532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nodes.setElementAt(node | hasThisLevelBefore, index);
6552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Insert below-common-weight node.
6562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int nextIndex = nextIndexFromNode(node);
6572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nodeFromWeight16(weight16) | nodeFromStrength(level);
6582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = insertNodeBetween(index, nextIndex, node);
6592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Insert common-weight node.
6602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                insertNodeBetween(index, nextIndex, commonNode);
6612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Return index of below-common-weight node.
6622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return index;
6632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Find the root CE's weight for this level.
6672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Postpone insertion if not found:
6682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Insert the new root node before the next stronger node,
6692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // or before the next root node with the same strength and a larger weight.
6702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int nextIndex;
6712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while((nextIndex = nextIndexFromNode(node)) != 0) {
6722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(nextIndex);
6732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int nextStrength = strengthFromNode(node);
6742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(nextStrength <= level) {
6752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Insert before a stronger node.
6762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(nextStrength < level) { break; }
6772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // nextStrength == level
6782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(!isTailoredNode(node)) {
6792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int nextWeight16 = weight16FromNode(node);
6802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(nextWeight16 == weight16) {
6812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Found the node for the root CE up to this level.
6822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        return nextIndex;
6832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
6842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    // Insert before a node with a larger same-strength weight.
6852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(nextWeight16 > weight16) { break; }
6862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
6872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
6882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Skip the next node.
6892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = nextIndex;
6902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
6912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node = nodeFromWeight16(weight16) | nodeFromStrength(level);
6922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return insertNodeBetween(index, nextIndex, node);
6932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
6942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
6952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
6962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Makes and inserts a new tailored node into the list, after the one at index.
6972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Skips over nodes of weaker strength to maintain collation order
6982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * ("postpone insertion").
6992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the new node's index
7002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int insertTailoredNodeAfter(int index, int strength) {
7022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(0 <= index && index < nodes.size());
7032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strength >= Collator.SECONDARY) {
7042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = findCommonNode(index, Collator.SECONDARY);
7052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength >= Collator.TERTIARY) {
7062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                index = findCommonNode(index, Collator.TERTIARY);
7072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
7082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Postpone insertion:
7102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Insert the new node before the next one with a strength at least as strong.
7112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long node = nodes.elementAti(index);
7122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int nextIndex;
7132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while((nextIndex = nextIndexFromNode(node)) != 0) {
7142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(nextIndex);
7152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strengthFromNode(node) <= strength) { break; }
7162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Skip the next node which has a weaker (larger) strength than the new one.
7172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = nextIndex;
7182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node = IS_TAILORED | nodeFromStrength(strength);
7202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return insertNodeBetween(index, nextIndex, node);
7212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Inserts a new node into the list, between list-adjacent items.
7252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The node's previous and next indexes must not be set yet.
7262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * @return the new node's index
7272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int insertNodeBetween(int index, int nextIndex, long node) {
7292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(previousIndexFromNode(node) == 0);
7302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(nextIndexFromNode(node) == 0);
7312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(nextIndexFromNode(nodes.elementAti(index)) == nextIndex);
7322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Append the new node and link it to the existing nodes.
7332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int newIndex = nodes.size();
7342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node |= nodeFromPreviousIndex(index) | nodeFromNextIndex(nextIndex);
7352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nodes.addElement(node);
7362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // nodes[index].nextIndex = newIndex
7372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node = nodes.elementAti(index);
7382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        nodes.setElementAt(changeNodeNextIndex(node, newIndex), index);
7392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // nodes[nextIndex].previousIndex = newIndex
7402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(nextIndex != 0) {
7412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(nextIndex);
7422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            nodes.setElementAt(changeNodePreviousIndex(node, newIndex), nextIndex);
7432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return newIndex;
7452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
7482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Finds the node which implies or contains a common=05 weight of the given strength
7492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (secondary or tertiary), if the current node is stronger.
7502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Skips weaker nodes and tailored nodes if the current node is stronger
7512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and is followed by an explicit-common-weight node.
7522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Always returns the input index if that node is no stronger than the given strength.
7532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
7542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int findCommonNode(int index, int strength) {
7552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(Collator.SECONDARY <= strength && strength <= Collator.TERTIARY);
7562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long node = nodes.elementAti(index);
7572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strengthFromNode(node) >= strength) {
7582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The current node is no stronger.
7592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return index;
7602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(strength == Collator.SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node)) {
7622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // The current node implies the strength-common weight.
7632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return index;
7642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        index = nextIndexFromNode(node);
7662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        node = nodes.elementAti(index);
7672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(!isTailoredNode(node) && strengthFromNode(node) == strength &&
7682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                weight16FromNode(node) < Collation.COMMON_WEIGHT16);
7692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Skip to the explicit common node.
7702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        do {
7712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            index = nextIndexFromNode(node);
7722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            node = nodes.elementAti(index);
7732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(strengthFromNode(node) >= strength);
7742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } while(isTailoredNode(node) || strengthFromNode(node) > strength ||
7752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                weight16FromNode(node) < Collation.COMMON_WEIGHT16);
7762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16);
7772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return index;
7782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
7792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void setCaseBits(CharSequence nfdString) {
7812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int numTailoredPrimaries = 0;
7822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i = 0; i < cesLength; ++i) {
7832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(ceStrength(ces[i]) == Collator.PRIMARY) { ++numTailoredPrimaries; }
7842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
7852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We should not be able to get too many case bits because
7862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // cesLength<=31==MAX_EXPANSION_LENGTH.
7872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // 31 pairs of case bits fit into an long without setting its sign bit.
7882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(numTailoredPrimaries <= 31);
7892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long cases = 0;
7912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(numTailoredPrimaries > 0) {
7922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            CharSequence s = nfdString;
7932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            UTF16CollationIterator baseCEs = new UTF16CollationIterator(baseData, false, s, 0);
7942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int baseCEsLength = baseCEs.fetchCEs() - 1;
7952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(baseCEsLength >= 0 && baseCEs.getCE(baseCEsLength) == Collation.NO_CE);
7962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
7972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int lastCase = 0;
7982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int numBasePrimaries = 0;
7992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(int i = 0; i < baseCEsLength; ++i) {
8002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long ce = baseCEs.getCE(i);
8012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if((ce >>> 32) != 0) {
8022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++numBasePrimaries;
8032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    int c = ((int)ce >> 14) & 3;
8042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    assert(c == 0 || c == 2);  // lowercase or uppercase, no mixed case in any base CE
8052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(numBasePrimaries < numTailoredPrimaries) {
8062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        cases |= (long)c << ((numBasePrimaries - 1) * 2);
8072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(numBasePrimaries == numTailoredPrimaries) {
8082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        lastCase = c;
8092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else if(c != lastCase) {
8102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // There are more base primary CEs than tailored primaries.
8112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Set mixed case if the case bits of the remainder differ.
8122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        lastCase = 1;
8132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // Nothing more can change.
8142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        break;
8152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
8162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(numBasePrimaries >= numTailoredPrimaries) {
8192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                cases |= (long)lastCase << ((numTailoredPrimaries - 1) * 2);
8202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i = 0; i < cesLength; ++i) {
8242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long ce = ces[i] & 0xffffffffffff3fffL;  // clear old case bits
8252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int strength = ceStrength(ce);
8262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strength == Collator.PRIMARY) {
8272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce |= (cases & 3) << 14;
8282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                cases >>>= 2;
8292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(strength == Collator.TERTIARY) {
8302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Tertiary CEs must have uppercase bits.
8312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // See the LDML spec, and comments in class CollationCompare.
8322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce |= 0x8000;
8332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Tertiary ignorable CEs must have 0 case bits.
8352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We set 0 case bits for secondary CEs too
8362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // since currently only U+0345 is cased and maps to a secondary CE,
8372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // and it is lowercase. Other secondaries are uncased.
8382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // See [[:Cased:]&[:uca1=:]] where uca1 queries the root primary weight.
8392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ces[i] = ce;
8402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Implements CollationRuleParser.Sink. */
8442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Override
8452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void suppressContractions(UnicodeSet set) {
8462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dataBuilder.suppressContractions(set);
8472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Implements CollationRuleParser.Sink. */
8502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    @Override
8512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    void optimize(UnicodeSet set) {
8522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        optimizeSet.addAll(set);
8532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
8562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Adds the mapping and its canonical closure.
8572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Takes ce32=dataBuilder.encodeCEs(...) so that the data builder
8582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * need not re-encode the CEs multiple times.
8592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
8602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int addWithClosure(CharSequence nfdPrefix, CharSequence nfdString,
8612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long[] newCEs, int newCEsLength, int ce32) {
8622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Map from the NFD input to the CEs.
8632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ce32 = addIfDifferent(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
8642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        ce32 = addOnlyClosure(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
8652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        addTailComposites(nfdPrefix, nfdString);
8662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ce32;
8672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
8682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
8692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
8702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long[] newCEs, int newCEsLength, int ce32) {
8712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
8722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String
8732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(nfdPrefix.length() == 0) {
8742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
8752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String prefix = "";
8762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
8772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String str = stringIter.next();
8782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(str == null) { break; }
8792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(ignoreString(str) || str.contentEquals(nfdString)) { continue; }
8802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
8812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else {
8832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            CanonicalIterator prefixIter = new CanonicalIterator(nfdPrefix.toString());
8842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
8852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            for(;;) {
8862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                String prefix = prefixIter.next();
8872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(prefix == null) { break; }
8882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(ignorePrefix(prefix)) { continue; }
8892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                boolean samePrefix = prefix.contentEquals(nfdPrefix);
8902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                for(;;) {
8912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    String str = stringIter.next();
8922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(str == null) { break; }
8932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(ignoreString(str) || (samePrefix && str.contentEquals(nfdString))) { continue; }
8942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
8952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
8962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                stringIter.reset();
8972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
8982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
8992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ce32;
9002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
9032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Look for the last starter in the NFD string.
9042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lastStarter;
9052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int indexAfterLastStarter = nfdString.length();
9062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
9072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(indexAfterLastStarter == 0) { return; }  // no starter at all
9082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
9092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(nfd.getCombiningClass(lastStarter) == 0) { break; }
9102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            indexAfterLastStarter -= Character.charCount(lastStarter);
9112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // No closure to Hangul syllables since we decompose them on the fly.
9132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(Hangul.isJamoL(lastStarter)) { return; }
9142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Are there any composites whose decomposition starts with the lastStarter?
9162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
9172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We might find some more equivalent mappings here if it did.
9182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSet composites = new UnicodeSet();
9192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!nfcImpl.getCanonStartSet(lastStarter, composites)) { return; }
9202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
9222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
9232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSetIterator iter = new UnicodeSetIterator(composites);
9242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(iter.next()) {
9252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
9262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int composite = iter.codepoint;
9272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String decomp = nfd.getDecomposition(composite);
9282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp,
9292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    newNFDString, newString)) {
9302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
9312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
9332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
9342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Ignore mappings that we cannot store.
9352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
9362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Note: It is possible that the newCEs do not make use of the mapping
9382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for which we are adding the tail composites, in which case we might be adding
9392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // unnecessary mappings.
9402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // For example, when we add tail composites for ae^ (^=combining circumflex),
9412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // UCA discontiguous-contraction matching does not find any matches
9422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // for ae_^ (_=any combining diacritic below) *unless* there is also
9432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // a contraction mapping for ae.
9442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Thus, if there is no ae contraction, then the ae^ mapping is ignored
9452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // while fetching the newCEs for ae_^.
9462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // TODO: Try to detect this effectively.
9472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // (Alternatively, print a warning when prefix contractions are missing.)
9482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We do not need an explicit mapping for the NFD strings.
9502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // It is fine if the NFD input collates like this via a sequence of mappings.
9512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // It also saves a little bit of space, and may reduce the set of characters with contractions.
9522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int ce32 = addIfDifferent(nfdPrefix, newString,
9532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                          newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
9542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(ce32 != Collation.UNASSIGNED_CE32) {
9552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // was different, was added
9562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
9572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
9582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
9602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean mergeCompositeIntoString(CharSequence nfdString, int indexAfterLastStarter,
9622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int composite, CharSequence decomp,
9632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                StringBuilder newNFDString, StringBuilder newString) {
9642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(Character.codePointBefore(nfdString, indexAfterLastStarter) ==
9652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                Character.codePointAt(decomp, 0));
9662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int lastStarterLength = Character.offsetByCodePoints(decomp, 0, 1);
9672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(lastStarterLength == decomp.length()) {
9682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Singleton decompositions should be found by addWithClosure()
9692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // and the CanonicalIterator, so we can ignore them here.
9702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
9712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(equalSubSequences(nfdString, indexAfterLastStarter, decomp, lastStarterLength)) {
9732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // same strings, nothing new to be found here
9742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
9752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
9762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Make new FCD strings that combine a composite, or its decomposition,
9782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // into the nfdString's last starter and the combining marks following it.
9792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Make an NFD version, and a version with the composite.
9802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newNFDString.setLength(0);
9812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newNFDString.append(nfdString, 0, indexAfterLastStarter);
9822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newString.setLength(0);
9832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newString.append(nfdString, 0, indexAfterLastStarter - lastStarterLength)
9842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            .appendCodePoint(composite);
9852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
9862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The following is related to discontiguous contraction matching,
9872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // but builds only FCD strings (or else returns false).
9882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceIndex = indexAfterLastStarter;
9892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int decompIndex = lastStarterLength;
9902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Small optimization: We keep the source character across loop iterations
9912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // because we do not always consume it,
9922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // and then need not fetch it again nor look up its combining class again.
9932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceChar = Collation.SENTINEL_CP;
9942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // The cc variables need to be declared before the loop so that at the end
9952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // they are set to the last combining classes seen.
9962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sourceCC = 0;
9972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int decompCC = 0;
9982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
9992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sourceChar < 0) {
10002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(sourceIndex >= nfdString.length()) { break; }
10012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceChar = Character.codePointAt(nfdString, sourceIndex);
10022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceCC = nfd.getCombiningClass(sourceChar);
10032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                assert(sourceCC != 0);
10042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // We consume a decomposition character in each iteration.
10062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(decompIndex >= decomp.length()) { break; }
10072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int decompChar = Character.codePointAt(decomp, decompIndex);
10082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            decompCC = nfd.getCombiningClass(decompChar);
10092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // Compare the two characters and their combining classes.
10102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(decompCC == 0) {
10112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Unable to merge because the source contains a non-zero combining mark
10122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // but the composite's decomposition contains another starter.
10132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // The strings would not be equivalent.
10142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
10152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(sourceCC < decompCC) {
10162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Composite + sourceChar would not be FCD.
10172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
10182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(decompCC < sourceCC) {
10192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                newNFDString.appendCodePoint(decompChar);
10202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompIndex += Character.charCount(decompChar);
10212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else if(decompChar != sourceChar) {
10222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Blocked because same combining class.
10232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
10242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {  // match: decompChar == sourceChar
10252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                newNFDString.appendCodePoint(decompChar);
10262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                decompIndex += Character.charCount(decompChar);
10272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceIndex += Character.charCount(decompChar);
10282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                sourceChar = Collation.SENTINEL_CP;
10292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // We are at the end of at least one of the two inputs.
10322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(sourceChar >= 0) {  // more characters from nfdString but not from decomp
10332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(sourceCC < decompCC) {
10342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Appending the next source character to the composite would not be FCD.
10352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
10362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            newNFDString.append(nfdString, sourceIndex, nfdString.length());
10382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            newString.append(nfdString, sourceIndex, nfdString.length());
10392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        } else if(decompIndex < decomp.length()) {  // more characters from decomp, not from nfdString
10402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            newNFDString.append(decomp, decompIndex, decomp.length());
10412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(nfd.isNormalized(newNFDString));
10432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(fcd.isNormalized(newString));
10442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(nfd.normalize(newString).equals(newNFDString.toString()));  // canonically equivalent
10452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
10462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean equalSubSequences(CharSequence left, int leftStart, CharSequence right, int rightStart) {
10492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // C++ UnicodeString::compare(leftStart, 0x7fffffff, right, rightStart, 0x7fffffff) == 0
10502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int leftLength = left.length();
10512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if((leftLength - leftStart) != (right.length() - rightStart)) { return false; }
10522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(leftStart < leftLength) {
10532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(left.charAt(leftStart++) != right.charAt(rightStart++)) {
10542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return false;
10552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
10582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean ignorePrefix(CharSequence s) {
10602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Do not map non-FCD prefixes.
10612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !isFCD(s);
10622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean ignoreString(CharSequence s) {
10642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Do not map non-FCD strings.
10652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Do not map strings that start with Hangul syllables: We decompose those on the fly.
10662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return !isFCD(s) || Hangul.isHangul(s.charAt(0));
10672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean isFCD(CharSequence s) {
10692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return fcd.isNormalized(s);
10702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final UnicodeSet COMPOSITES = new UnicodeSet("[:NFD_QC=N:]");
10732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    static {
10742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        // Hangul is decomposed on the fly during collation.
10752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        COMPOSITES.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
10762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void closeOverComposites() {
10792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        String prefix = "";  // empty
10802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES);
10812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        while(iter.next()) {
10822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
10832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String nfdString = nfd.getDecomposition(iter.codepoint);
10842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
10852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
10862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // Too many CEs from the decomposition (unusual), ignore this composite.
10872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // We could add a capacity parameter to getCEs() and reallocate if necessary.
10882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // However, this can only really happen in contrived cases.
10892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                continue;
10902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
10912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            String composite = iter.getString();
10922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32);
10932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
10942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
10952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
10962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int addIfDifferent(CharSequence prefix, CharSequence str,
10972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long[] newCEs, int newCEsLength, int ce32) {
10982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long[] oldCEs = new long[Collation.MAX_EXPANSION_LENGTH];
10992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int oldCEsLength = dataBuilder.getCEs(prefix, str, oldCEs, 0);
11002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(!sameCEs(newCEs, newCEsLength, oldCEs, oldCEsLength)) {
11012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(ce32 == Collation.UNASSIGNED_CE32) {
11022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                ce32 = dataBuilder.encodeCEs(newCEs, newCEsLength);
11032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            dataBuilder.addCE32(prefix, str, ce32);
11052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ce32;
11072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean sameCEs(long[] ces1, int ces1Length,
11102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                long[] ces2, int ces2Length) {
11112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(ces1Length != ces2Length) {
11122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            return false;
11132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        assert(ces1Length <= Collation.MAX_EXPANSION_LENGTH);
11152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int i = 0; i < ces1Length; ++i) {
11162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(ces1[i] != ces2[i]) { return false; }
11172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return true;
11192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int alignWeightRight(int w) {
11222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(w != 0) {
11232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while((w & 0xff) == 0) { w >>>= 8; }
11242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return w;
11262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
11272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
11292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Walks the tailoring graph and overwrites tailored nodes with new CEs.
11302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * After this, the graph is destroyed.
11312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The nodes array can then be used only as a source of tailored CEs.
11322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
11332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void makeTailoredCEs() {
11342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationWeights primaries = new CollationWeights();
11352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationWeights secondaries = new CollationWeights();
11362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationWeights tertiaries = new CollationWeights();
11372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        long[] nodesArray = nodes.getBuffer();
11382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        if(DEBUG) {
11392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            System.out.println("\nCollationBuilder.makeTailoredCEs()");
11402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
11412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
11422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) {
11432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int i = rootPrimaryIndexes.elementAti(rpi);
11442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long node = nodesArray[i];
11452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long p = weight32FromNode(node);
11462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int s = p == 0 ? 0 : Collation.COMMON_WEIGHT16;
11472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int t = s;
11482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int q = 0;
11492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean pIsTailored = false;
11502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean sIsTailored = false;
11512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            boolean tIsTailored = false;
11522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(DEBUG) {
11532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                System.out.printf("\nprimary     %x\n", alignWeightRight((int)p));
11542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
11552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int pIndex = p == 0 ? 0 : rootElements.findPrimary(p);
11562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            int nextIndex = nextIndexFromNode(node);
11572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            while(nextIndex != 0) {
11582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                i = nextIndex;
11592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                node = nodesArray[i];
11602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                nextIndex = nextIndexFromNode(node);
11612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                int strength = strengthFromNode(node);
11622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(strength == Collator.QUATERNARY) {
11632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    assert(isTailoredNode(node));
11642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(DEBUG) {
11652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        System.out.print("      quat+     ");
11662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(q == 3) {
11682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        // C++ U_BUFFER_OVERFLOW_ERROR
11692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        throw new UnsupportedOperationException("quaternary tailoring gap too small");
11702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
11712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++q;
11722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
11732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(strength == Collator.TERTIARY) {
11742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(isTailoredNode(node)) {
11752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(DEBUG) {
11762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                System.out.print("    ter+        ");
11772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
11782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(!tIsTailored) {
11792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // First tailored tertiary node for [p, s].
11802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                int tCount = countTailoredNodes(nodesArray, nextIndex,
11812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                    Collator.TERTIARY) + 1;
11822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                int tLimit;
11832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(t == 0) {
11842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // Gap at the beginning of the tertiary CE range.
11852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    t = rootElements.getTertiaryBoundary() - 0x100;
11862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK;
11872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                } else if(!pIsTailored && !sIsTailored) {
11882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // p and s are root weights.
11892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    tLimit = rootElements.getTertiaryAfter(pIndex, s, t);
11902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                } else if(t == Collation.BEFORE_WEIGHT16) {
11912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    tLimit = Collation.COMMON_WEIGHT16;
11922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                } else {
11932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // [p, s] is tailored.
11942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    assert(t == Collation.COMMON_WEIGHT16);
11952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    tLimit = rootElements.getTertiaryBoundary();
11962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
11972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                assert(tLimit == 0x4000 || (tLimit & ~Collation.ONLY_TERTIARY_MASK) == 0);
11982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                tertiaries.initForTertiary();
11992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(!tertiaries.allocWeights(t, tLimit, tCount)) {
12002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // C++ U_BUFFER_OVERFLOW_ERROR
12012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    throw new UnsupportedOperationException("tertiary tailoring gap too small");
12022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
12032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                tIsTailored = true;
12042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
12052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            t = (int)tertiaries.nextWeight();
12062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            assert(t != 0xffffffff);
12072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else {
12082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            t = weight16FromNode(node);
12092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            tIsTailored = false;
12102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(DEBUG) {
12112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                System.out.printf("    ter     %x\n", alignWeightRight(t));
12122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
12132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
12142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    } else {
12152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        if(strength == Collator.SECONDARY) {
12162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(isTailoredNode(node)) {
12172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(DEBUG) {
12182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    System.out.print("  sec+          ");
12192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
12202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(!sIsTailored) {
12212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // First tailored secondary node for p.
12222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    int sCount = countTailoredNodes(nodesArray, nextIndex,
12232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                        Collator.SECONDARY) + 1;
12242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    int sLimit;
12252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    if(s == 0) {
12262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // Gap at the beginning of the secondary CE range.
12272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        s = rootElements.getSecondaryBoundary() - 0x100;
12282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16);
12292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    } else if(!pIsTailored) {
12302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // p is a root primary.
12312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        sLimit = rootElements.getSecondaryAfter(pIndex, s);
12322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    } else if(s == Collation.BEFORE_WEIGHT16) {
12332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        sLimit = Collation.COMMON_WEIGHT16;
12342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    } else {
12352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // p is a tailored primary.
12362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        assert(s == Collation.COMMON_WEIGHT16);
12372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        sLimit = rootElements.getSecondaryBoundary();
12382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    }
12392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    if(s == Collation.COMMON_WEIGHT16) {
12402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // Do not tailor into the getSortKey() range of
12412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // compressed common secondaries.
12422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        s = rootElements.getLastCommonSecondary();
12432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    }
12442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    secondaries.initForSecondary();
12452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    if(!secondaries.allocWeights(s, sLimit, sCount)) {
12462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        // C++ U_BUFFER_OVERFLOW_ERROR
12472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        if(DEBUG) {
12482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                            System.out.printf("!secondaries.allocWeights(%x, %x, sCount=%d)\n",
12492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                    alignWeightRight(s), alignWeightRight(sLimit),
12502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                    alignWeightRight(sCount));
12512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        }
12522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                        throw new UnsupportedOperationException("secondary tailoring gap too small");
12532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    }
12542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    sIsTailored = true;
12552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
12562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                s = (int)secondaries.nextWeight();
12572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                assert(s != 0xffffffff);
12582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            } else {
12592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                s = weight16FromNode(node);
12602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                sIsTailored = false;
12612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(DEBUG) {
12622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    System.out.printf("  sec       %x\n", alignWeightRight(s));
12632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
12642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
12652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        } else /* Collator.PRIMARY */ {
12662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            assert(isTailoredNode(node));
12672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(DEBUG) {
12682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                System.out.print("pri+            ");
12692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
12702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            if(!pIsTailored) {
12712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                // First tailored primary node in this list.
12722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                int pCount = countTailoredNodes(nodesArray, nextIndex,
12732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                                                    Collator.PRIMARY) + 1;
12742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                boolean isCompressible = baseData.isCompressiblePrimary(p);
12752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                long pLimit =
12762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    rootElements.getPrimaryAfter(p, pIndex, isCompressible);
12772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                primaries.initForPrimary(isCompressible);
12782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                if(!primaries.allocWeights(p, pLimit, pCount)) {
12792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    // C++ U_BUFFER_OVERFLOW_ERROR  // TODO: introduce a more specific UErrorCode?
12802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                    throw new UnsupportedOperationException("primary tailoring gap too small");
12812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                }
12822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                                pIsTailored = true;
12832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            }
12842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            p = primaries.nextWeight();
12852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            assert(p != 0xffffffffL);
12862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            s = Collation.COMMON_WEIGHT16;
12872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                            sIsTailored = false;
12882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        }
12892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        t = s == 0 ? 0 : Collation.COMMON_WEIGHT16;
12902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        tIsTailored = false;
12912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    q = 0;
12932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
12942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isTailoredNode(node)) {
12952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    nodesArray[i] = Collation.makeCE(p, s, t, q);
12962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    if(DEBUG) {
12972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                        System.out.printf("%016x\n", nodesArray[i]);
12982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    }
12992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Counts the tailored nodes of the given strength up to the next node
13062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * which is either stronger or has an explicit weight of this strength.
13072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int countTailoredNodes(long[] nodesArray, int i, int strength) {
13092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int count = 0;
13102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        for(;;) {
13112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(i == 0) { break; }
13122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            long node = nodesArray[i];
13132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strengthFromNode(node) < strength) { break; }
13142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(strengthFromNode(node) == strength) {
13152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                if(isTailoredNode(node)) {
13162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    ++count;
13172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                } else {
13182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                    break;
13192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                }
13202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            i = nextIndexFromNode(node);
13222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return count;
13242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final class CEFinalizer implements CollationDataBuilder.CEModifier {
13272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CEFinalizer(long[] ces) {
13282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            finalCEs = ces;
13292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1330f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
13312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public long modifyCE32(int ce32) {
13322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            assert(!Collation.isSpecialCE32(ce32));
13332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(CollationBuilder.isTempCE32(ce32)) {
13342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // retain case bits
13352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return finalCEs[CollationBuilder.indexFromTempCE32(ce32)] | ((ce32 & 0xc0) << 8);
13362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
13372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return Collation.NO_CE;
13382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
1340f86f25d102340da66b9c7cb6b2d5ecdc0de43ecfFredrik Roubert        @Override
13412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        public long modifyCE(long ce) {
13422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            if(CollationBuilder.isTempCE(ce)) {
13432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                // retain case bits
13442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return finalCEs[CollationBuilder.indexFromTempCE(ce)] | (ce & 0xc000);
13452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            } else {
13462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller                return Collation.NO_CE;
13472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            }
13482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        }
13492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        private long[] finalCEs;
13512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    };
13522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** Replaces temporary CEs with the final CEs they point to. */
13542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private void finalizeCEs() {
13552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CollationDataBuilder newBuilder = new CollationDataBuilder();
13562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newBuilder.initForTailoring(baseData);
13572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        CEFinalizer finalizer = new CEFinalizer(nodes.getBuffer());
13582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        newBuilder.copyFrom(dataBuilder, finalizer);
13592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        dataBuilder = newBuilder;
13602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
13622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
13632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Encodes "temporary CE" data into a CE that fits into the CE32 data structure,
13642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with 2-byte primary, 1-byte secondary and 6-bit tertiary,
13652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with valid CE byte values.
13662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
13672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The index must not exceed 20 bits (0xfffff).
13682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The strength must fit into 2 bits (Collator.PRIMARY..Collator.QUATERNARY).
13692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
13702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Temporary CEs are distinguished from real CEs by their use of
13712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * secondary weights 06..45 which are otherwise reserved for compressed sort keys.
13722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
13732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The case bits are unused and available.
13742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
13752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long tempCEFromIndexAndStrength(int index, int strength) {
13762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return
13772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // CE byte offsets, to ensure valid CE bytes, and case bits 11
13782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            0x4040000006002000L +
13792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // index bits 19..13 -> primary byte 1 = CE bits 63..56 (byte values 40..BF)
13802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((long)(index & 0xfe000) << 43) +
13812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // index bits 12..6 -> primary byte 2 = CE bits 55..48 (byte values 40..BF)
13822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((long)(index & 0x1fc0) << 42) +
13832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // index bits 5..0 -> secondary byte 1 = CE bits 31..24 (byte values 06..45)
13842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((index & 0x3f) << 24) +
13852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            // strength bits 1..0 -> tertiary byte 1 = CE bits 13..8 (byte values 20..23)
13862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (strength << 8);
13872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int indexFromTempCE(long tempCE) {
13892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        tempCE -= 0x4040000006002000L;
13902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return
13912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((int)(tempCE >> 43) & 0xfe000) |
13922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((int)(tempCE >> 42) & 0x1fc0) |
13932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((int)(tempCE >> 24) & 0x3f);
13942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int strengthFromTempCE(long tempCE) {
13962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((int)tempCE >> 8) & 3;
13972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
13982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isTempCE(long ce) {
13992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        int sec = (int)ce >>> 24;
14002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return 6 <= sec && sec <= 0x45;
14012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int indexFromTempCE32(int tempCE32) {
14042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        tempCE32 -= 0x40400620;
14052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return
14062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((tempCE32 >> 11) & 0xfe000) |
14072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((tempCE32 >> 10) & 0x1fc0) |
14082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((tempCE32 >> 8) & 0x3f);
14092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isTempCE32(int ce32) {
14112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return
14122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (ce32 & 0xff) >= 2 &&  // not a long-primary/long-secondary CE32
14132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45;
14142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int ceStrength(long ce) {
14172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return
14182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            isTempCE(ce) ? strengthFromTempCE(ce) :
14192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            (ce & 0xff00000000000000L) != 0 ? Collator.PRIMARY :
14202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ((int)ce & 0xff000000) != 0 ? Collator.SECONDARY :
14212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            ce != 0 ? Collator.TERTIARY :
14222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller            Collator.IDENTICAL;
14232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /** At most 1M nodes, limited by the 20 bits in node bit fields. */
14262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int MAX_INDEX = 0xfffff;
14272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Node bit 6 is set on a primary node if there are nodes
14292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with secondary values below the common secondary weight (05).
14302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int HAS_BEFORE2 = 0x40;
14322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Node bit 5 is set on a primary or secondary node if there are nodes
14342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with tertiary values below the common tertiary weight (05).
14352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int HAS_BEFORE3 = 0x20;
14372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
14382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Node bit 3 distinguishes a tailored node, which has no weight value,
14392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * from a node with an explicit (root or default) weight.
14402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
14412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static final int IS_TAILORED = 8;
14422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long nodeFromWeight32(long weight32) {
14442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return weight32 << 32;
14452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long nodeFromWeight16(int weight16) {
14472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (long)weight16 << 48;
14482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long nodeFromPreviousIndex(int previous) {
14502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (long)previous << 28;
14512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long nodeFromNextIndex(int next) {
14532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return next << 8;
14542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long nodeFromStrength(int strength) {
14562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return strength;
14572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long weight32FromNode(long node) {
14602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return node >>> 32;
14612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int weight16FromNode(long node) {
14632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (int)(node >> 48) & 0xffff;
14642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int previousIndexFromNode(long node) {
14662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (int)(node >> 28) & MAX_INDEX;
14672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int nextIndexFromNode(long node) {
14692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return ((int)node >> 8) & MAX_INDEX;
14702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static int strengthFromNode(long node) {
14722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (int)node & 3;
14732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean nodeHasBefore2(long node) {
14762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & HAS_BEFORE2) != 0;
14772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean nodeHasBefore3(long node) {
14792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & HAS_BEFORE3) != 0;
14802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean nodeHasAnyBefore(long node) {
14822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & (HAS_BEFORE2 | HAS_BEFORE3)) != 0;
14832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static boolean isTailoredNode(long node) {
14852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & IS_TAILORED) != 0;
14862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long changeNodePreviousIndex(long node, int previous) {
14892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & 0xffff00000fffffffL) | nodeFromPreviousIndex(previous);
14902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private static long changeNodeNextIndex(long node, int next) {
14922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller        return (node & 0xfffffffff00000ffL) | nodeFromNextIndex(next);
14932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    }
14942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Normalizer2 nfd, fcd;
14962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private Normalizer2Impl nfcImpl;
14972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
14982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CollationTailoring base;
14992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CollationData baseData;
15002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CollationRootElements rootElements;
15012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private long variableTop;
15022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private CollationDataBuilder dataBuilder;
15042ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private boolean fastLatinEnabled;
15052ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UnicodeSet optimizeSet = new UnicodeSet();
15062ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15072ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
15082ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private int cesLength;
15092ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller
15102ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15112ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Indexes of nodes with root primary weights, sorted by primary.
15122ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Compact form of a TreeMap from root primary to node index.
15132ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15142ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This is a performance optimization for finding reset positions.
15152ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Without this, we would have to search through the entire nodes list.
15162ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * It also allows storing root primary weights in list head nodes,
15172ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * without previous index, leaving room in root primary nodes for 32-bit primary weights.
15182ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
15192ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UVector32 rootPrimaryIndexes;
15202ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    /**
15212ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Data structure for assigning tailored weights and CEs.
15222ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Doubly-linked lists of nodes in mostly collation order.
15232ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Each list starts with a root primary node and ends with a nextIndex of 0.
15242ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15252ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * When there are any nodes in the list, then there is always a root primary node at index 0.
15262ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * This allows some code not to have to check explicitly for nextIndex==0.
15272ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15282ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Root primary nodes have 32-bit weights but do not have previous indexes.
15292ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * All other nodes have at most 16-bit weights and do have previous indexes.
15302ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15312ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Nodes with explicit weights store root collator weights,
15322ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or default weak weights (e.g., secondary 05) for stronger nodes.
15332ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * "Tailored" nodes, with the IS_TAILORED bit set,
15342ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * do not store explicit weights but rather
15352ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * create a difference of a certain strength from the preceding node.
15362ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15372ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A root node is followed by either
15382ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - a root/default node of the same strength, or
15392ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - a root/default node of the next-weaker strength, or
15402ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - a tailored node of the same strength.
15412ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15422ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A node of a given strength normally implies "common" weights on weaker levels.
15432ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15442ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A node with HAS_BEFORE2 must be immediately followed by
15452ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * a secondary node with an explicit below-common weight, then a secondary tailored node,
15462ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and later an explicit common-secondary node.
15472ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * The below-common weight can be a root weight,
15482ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weight
15492ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * or before the lowest root weight.
15502ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * (&[before 2] resets to an explicit secondary node so that
15512ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * the following addRelation(secondary) tailors right after that.
15522ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If we did not have this node and instead were to reset on the primary node,
15532ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.)
15542ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15552ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * If the flag is not set, then there are no explicit secondary nodes
15562ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * with the common or lower weights.
15572ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15582ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Same for HAS_BEFORE3 for tertiary nodes and weights.
15592ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A node must not have both flags set.
15602ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15612ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Tailored CEs are initially represented in a CollationDataBuilder as temporary CEs
15622ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * which point to stable indexes in this list,
15632ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and temporary CEs stored in a CollationDataBuilder only point to tailored nodes.
15642ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15652ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A temporary CE in the ces[] array may point to a non-tailored reset-before-position node,
15662ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * until the next relation is added.
15672ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15682ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * At the end, the tailored weights are allocated as necessary,
15692ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * then the tailored nodes are replaced with final CEs,
15702ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and the CollationData is rewritten by replacing temporary CEs with final ones.
15712ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15722ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * We cannot simply insert new nodes in the middle of the array
15732ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * because that would invalidate the indexes stored in existing temporary CEs.
15742ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * We need to use a linked graph with stable indexes to existing nodes.
15752ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * A doubly-linked list seems easiest to maintain.
15762ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15772ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Each node is stored as an long, with its fields stored as bit fields.
15782ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15792ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Root primary node:
15802ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - primary weight: 32 bits 63..32
15812ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - reserved/unused/zero: 4 bits 31..28
15822ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15832ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * Weaker root nodes & tailored nodes:
15842ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - a weight: 16 bits 63..48
15852ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   + a root or default weight for a non-tailored node
15862ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   + unused/zero for a tailored node
15872ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - index to the previous node: 20 bits 47..28
15882ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15892ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * All types of nodes:
15902ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - index to the next node: 20 bits 27..8
15912ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *   + nextIndex=0 in last node per root-primary list
15922ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - reserved/unused/zero bits: bits 7, 4, 2
15932ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - HAS_BEFORE2: bit 6
15942ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - HAS_BEFORE3: bit 5
15952ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - IS_TAILORED: bit 3
15962ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * - the difference strength (primary/secondary/tertiary/quaternary): 2 bits 1..0
15972ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     *
15982ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * We could allocate structs with pointers, but we would have to store them
15992ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * in a pointer list so that they can be indexed from temporary CEs,
16002ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     * and they would require more memory allocations.
16012ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller     */
16022ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller    private UVector64 nodes;
16032ae130017183d2f66d55bf0ca51f8da3294644fdNeil Fuller}
1604