17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2013-2014, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* CollationBuilder.java, ported from collationbuilder.h/.cpp
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2013may06
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.text.ParseException;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Norm2AllModes;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl.Hangul;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.CanonicalIterator;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Normalizer2;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSetIterator;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.ULocale;
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationBuilder extends CollationRuleParser.Sink {
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean DEBUG = false;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class BundleImporter implements CollationRuleParser.Importer {
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BundleImporter() {}
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String getRules(String localeID, String collationType) {
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return CollationLoader.loadRules(new ULocale(localeID), collationType);
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CollationBuilder(CollationTailoring b) {
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfd = Normalizer2.getNFDInstance();
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fcd = Norm2AllModes.getFCDNormalizer2();
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = Norm2AllModes.getNFCInstance().impl;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        base = b;
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        baseData = b.data;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rootElements = new CollationRootElements(b.data.rootElements);
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop = 0;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dataBuilder = new CollationDataBuilder();
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        fastLatinEnabled = true;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cesLength = 0;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        rootPrimaryIndexes = new UVector32();
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nodes = new UVector64();
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl.ensureCanonIterData();
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dataBuilder.initForTailoring(baseData);
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CollationTailoring parseAndBuild(String ruleString) throws ParseException {
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(baseData.rootElements == null) {
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // C++ U_MISSING_RESOURCE_ERROR
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new UnsupportedOperationException(
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "missing root elements data, tailoring not supported");
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationTailoring tailoring = new CollationTailoring(base.settings);
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationRuleParser parser = new CollationRuleParser(baseData);
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: This always bases &[last variable] and &[first regular]
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // on the root collator's maxVariable/variableTop.
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // If we wanted this to change after [maxVariable x], then we would keep
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // the tailoring.settings pointer here and read its variableTop when we need it.
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // See http://unicode.org/cldr/trac/ticket/6070
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        variableTop = base.settings.readOnly().variableTop;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        parser.setSink(this);
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, there is only one Importer implementation.
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In C++, the importer is a parameter for this method.
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        parser.setImporter(new BundleImporter());
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationSettings ownedSettings = tailoring.settings.copyOnWrite();
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        parser.parse(ruleString, ownedSettings);
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(dataBuilder.hasMappings()) {
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            makeTailoredCEs();
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            closeOverComposites();
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            finalizeCEs();
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Copy all of ASCII, and Latin-1 letters, into each tailoring.
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            optimizeSet.add(0, 0x7f);
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            optimizeSet.add(0xc0, 0xff);
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Hangul is decomposed on the fly during collation,
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and the tailoring data is always built with HANGUL_TAG specials.
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            optimizeSet.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dataBuilder.optimize(optimizeSet);
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailoring.ensureOwnedData();
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(fastLatinEnabled) { dataBuilder.enableFastLatin(); }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dataBuilder.build(tailoring.ownedData);
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // C++ tailoring.builder = dataBuilder;
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dataBuilder = null;
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailoring.data = baseData;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tailoring.data, ownedSettings,
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ownedSettings.fastLatinPrimaries);
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailoring.rules = ruleString;
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In Java, we do not have a rules version.
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // In C++, the genrb build tool reads and supplies one,
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and the rulesVersion is a parameter for this method.
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailoring.setVersion(base.version, 0 /* rulesVersion */);
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return tailoring;
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Implements CollationRuleParser.Sink. */
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void addReset(int strength, CharSequence str) {
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(str.length() != 0);
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(str.charAt(0) == CollationRuleParser.POS_LEAD) {
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ces[0] = getSpecialResetPosition(str);
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cesLength = 1;
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert((ces[0] & Collation.CASE_AND_QUATERNARY_MASK) == 0);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // normal reset to a character or string
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String nfdString = nfd.normalize(str);
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException(
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "reset position maps to too many collation elements (more than 31)");
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strength == Collator.IDENTICAL) { return; }  // simple reset-at-position
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // &[before strength]position
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(Collator.PRIMARY <= strength && strength <= Collator.TERTIARY);
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int index = findOrInsertNodeForCEs(strength);
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long node = nodes.elementAti(index);
127f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // If the index is for a "weaker" node,
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // then skip backwards over this and further "weaker" nodes.
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(strengthFromNode(node) > strength) {
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = previousIndexFromNode(node);
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(index);
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Find or insert a node whose index we will put into a temporary CE.
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strengthFromNode(node) == strength && isTailoredNode(node)) {
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Reset to just before this same-strength tailored node.
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = previousIndexFromNode(node);
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(strength == Collator.PRIMARY) {
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // root primary node (has no previous index)
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long p = weight32FromNode(node);
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == 0) {
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "reset primary-before ignorable not possible");
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p <= rootElements.getFirstPrimary()) {
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // There is no primary gap between ignorables and the space-first-primary.
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "reset primary-before first non-ignorable not supported");
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(p == Collation.FIRST_TRAILING_PRIMARY) {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // We do not support tailoring to an unassigned-implicit CE.
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "reset primary-before [first trailing] not supported");
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            p = rootElements.getPrimaryBefore(p, baseData.isCompressiblePrimary(p));
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = findOrInsertNodeForPrimary(p);
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Go to the last node in this list:
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Tailor after the last node between adjacent root nodes.
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(;;) {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                node = nodes.elementAti(index);
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int nextIndex = nextIndexFromNode(node);
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(nextIndex == 0) { break; }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = nextIndex;
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // &[before 2] or &[before 3]
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = findCommonNode(index, Collator.SECONDARY);
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength >= Collator.TERTIARY) {
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = findCommonNode(index, Collator.TERTIARY);
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
171f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            // findCommonNode() stayed on the stronger node or moved to
172f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            // an explicit common-weight node of the reset-before strength.
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(index);
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strengthFromNode(node) == strength) {
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Found a same-strength node with an explicit weight.
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int weight16 = weight16FromNode(node);
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(weight16 == 0) {
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new UnsupportedOperationException(
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            (strength == Collator.SECONDARY) ?
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    "reset secondary-before secondary ignorable not possible" :
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    "reset tertiary-before completely ignorable not possible");
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
183f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                assert(weight16 > Collation.BEFORE_WEIGHT16);
184f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Reset to just before this node.
185f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Insert the preceding same-level explicit weight if it is not there already.
186f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Which explicit weight immediately precedes this one?
187f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                weight16 = getWeight16Before(index, node, strength);
188f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Does this preceding weight have a node?
189f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                int previousWeight16;
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int previousIndex = previousIndexFromNode(node);
191f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                for(int i = previousIndex;; i = previousIndexFromNode(node)) {
192f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    node = nodes.elementAti(i);
193f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    int previousStrength = strengthFromNode(node);
194f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    if(previousStrength < strength) {
195f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        assert(weight16 >= Collation.COMMON_WEIGHT16 || i == previousIndex);
196f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        // Either the reset element has an above-common weight and
197f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        // the parent node provides the implied common weight,
198f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        // or the reset element has a weight<=common in the node
199f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        // right after the parent, and we need to insert the preceding weight.
200f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        previousWeight16 = Collation.COMMON_WEIGHT16;
201f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        break;
202f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    } else if(previousStrength == strength && !isTailoredNode(node)) {
203f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        previousWeight16 = weight16FromNode(node);
204f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                        break;
205f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    }
206f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // Skip weaker nodes and same-level tailored nodes.
207f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                }
208f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                if(previousWeight16 == weight16) {
209f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // The preceding weight has a node,
210f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // maybe with following weaker or tailored nodes.
211f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // Reset to the last of them.
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = previousIndex;
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
214f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // Insert a node with the preceding weight, reset to that.
215f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    node = nodeFromWeight16(weight16) | nodeFromStrength(strength);
216f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    index = insertNodeBetween(previousIndex, index, node);
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Found a stronger node with implied strength-common weight.
220f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                int weight16 = getWeight16Before(index, node, strength);
221f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                index = findOrInsertWeakNode(index, weight16, strength);
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Strength of the temporary CE = strength of its reset position.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Code above raises an error if the before-strength is stronger.
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = ceStrength(ces[cesLength - 1]);
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ces[cesLength - 1] = tempCEFromIndexAndStrength(index, strength);
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
230f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    /**
231f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Returns the secondary or tertiary weight preceding the current node's weight.
232f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * node=nodes[index].
233f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     */
234f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    private int getWeight16Before(int index, long node, int level) {
235f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(strengthFromNode(node) < level || !isTailoredNode(node));
236f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Collect the root CE weights if this node is for a root CE.
237f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // If it is not, then return the low non-primary boundary for a tailored CE.
238f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int t;
239f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(strengthFromNode(node) == Collator.TERTIARY) {
240f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            t = weight16FromNode(node);
241f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else {
242f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            t = Collation.COMMON_WEIGHT16;  // Stronger node with implied common weight.
243f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
244f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        while(strengthFromNode(node) > Collator.SECONDARY) {
245f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            index = previousIndexFromNode(node);
246f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            node = nodes.elementAti(index);
247f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
248f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(isTailoredNode(node)) {
249f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return Collation.BEFORE_WEIGHT16;
250f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
251f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int s;
252f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(strengthFromNode(node) == Collator.SECONDARY) {
253f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            s = weight16FromNode(node);
254f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else {
255f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            s = Collation.COMMON_WEIGHT16;  // Stronger node with implied common weight.
256f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
257f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        while(strengthFromNode(node) > Collator.PRIMARY) {
258f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            index = previousIndexFromNode(node);
259f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            node = nodes.elementAti(index);
260f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
261f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(isTailoredNode(node)) {
262f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return Collation.BEFORE_WEIGHT16;
263f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
264f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // [p, s, t] is a root CE. Return the preceding weight for the requested level.
265f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        long p = weight32FromNode(node);
266f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int weight16;
267f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(level == Collator.SECONDARY) {
268f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            weight16 = rootElements.getSecondaryBefore(p, s);
269f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else {
270f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            weight16 = rootElements.getTertiaryBefore(p, s, t);
271f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            assert((weight16 & ~Collation.ONLY_TERTIARY_MASK) == 0);
272f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
273f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        return weight16;
274f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    }
275f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private long getSpecialResetPosition(CharSequence str) {
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(str.length() == 2);
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int strength = Collator.PRIMARY;
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isBoundary = false;
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationRuleParser.Position pos =
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                CollationRuleParser.POSITION_VALUES[str.charAt(1) - CollationRuleParser.POS_BASE];
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        switch(pos) {
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_TERTIARY_IGNORABLE:
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Quaternary CEs are not supported.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Non-zero quaternary weights are possible only on tertiary or stronger CEs.
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_TERTIARY_IGNORABLE:
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_SECONDARY_IGNORABLE: {
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look for a tailored tertiary node after [0, 0, 0].
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index = findOrInsertNodeForRootCE(0, Collator.TERTIARY);
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long node = nodes.elementAti(index);
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((index = nextIndexFromNode(node)) != 0) {
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                node = nodes.elementAti(index);
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(strengthFromNode(node) <= Collator.TERTIARY);
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isTailoredNode(node) && strengthFromNode(node) == Collator.TERTIARY) {
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return tempCEFromIndexAndStrength(index, Collator.TERTIARY);
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return rootElements.getFirstTertiaryCE();
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // No need to look for nodeHasAnyBefore() on a tertiary node.
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_SECONDARY_IGNORABLE:
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.getLastTertiaryCE();
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.TERTIARY;
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_PRIMARY_IGNORABLE: {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Look for a tailored secondary node after [0, 0, *].
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index = findOrInsertNodeForRootCE(0, Collator.SECONDARY);
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long node = nodes.elementAti(index);
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while((index = nextIndexFromNode(node)) != 0) {
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                node = nodes.elementAti(index);
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                strength = strengthFromNode(node);
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(strength < Collator.SECONDARY) { break; }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(strength == Collator.SECONDARY) {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(isTailoredNode(node)) {
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(nodeHasBefore3(node)) {
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            assert(isTailoredNode(nodes.elementAti(index)));
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return tempCEFromIndexAndStrength(index, Collator.SECONDARY);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.getFirstSecondaryCE();
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.SECONDARY;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_PRIMARY_IGNORABLE:
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.getLastSecondaryCE();
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            strength = Collator.SECONDARY;
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_VARIABLE:
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.getFirstPrimaryCE();
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isBoundary = true;  // FractionalUCA.txt: FDD1 00A0, SPACE first primary
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_VARIABLE:
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.lastCEWithPrimaryBefore(variableTop + 1);
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_REGULAR:
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.firstCEWithPrimaryAtLeast(variableTop + 1);
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isBoundary = true;  // FractionalUCA.txt: FDD1 263A, SYMBOL first primary
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_REGULAR:
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Use the Hani-first-primary rather than the actual last "regular" CE before it,
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for backward compatibility with behavior before the introduction of
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // script-first-primary CEs in the root collator.
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = rootElements.firstCEWithPrimaryAtLeast(
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                baseData.getFirstPrimaryForGroup(UScript.HAN));
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_IMPLICIT:
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = baseData.getSingleCE(0x4e00);
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_IMPLICIT:
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We do not support tailoring to an unassigned-implicit CE.
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new UnsupportedOperationException(
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "reset to [last implicit] not supported");
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case FIRST_TRAILING:
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce = Collation.makeCE(Collation.FIRST_TRAILING_PRIMARY);
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isBoundary = true;  // trailing first primary (there is no mapping for it)
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            break;
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        case LAST_TRAILING:
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new IllegalArgumentException("LDML forbids tailoring to U+FFFF");
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        default:
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(false);
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int index = findOrInsertNodeForRootCE(ce, strength);
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long node = nodes.elementAti(index);
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((pos.ordinal() & 1) == 0) {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // even pos = [first xyz]
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!nodeHasAnyBefore(node) && isBoundary) {
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // A <group> first primary boundary is artificially added to FractionalUCA.txt.
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // It is reachable via its special contraction, but is not normally used.
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Find the first character tailored after the boundary CE,
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // or the first real root CE after it.
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((index = nextIndexFromNode(node)) != 0) {
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // If there is a following node, then it must be tailored
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // because there are no root CEs with a boundary primary
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // and non-common secondary/tertiary weights.
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    node = nodes.elementAti(index);
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    assert(isTailoredNode(node));
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce = tempCEFromIndexAndStrength(index, strength);
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    assert(strength == Collator.PRIMARY);
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    long p = ce >>> 32;
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int pIndex = rootElements.findPrimary(p);
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    boolean isCompressible = baseData.isCompressiblePrimary(p);
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    p = rootElements.getPrimaryAfter(p, pIndex, isCompressible);
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce = Collation.makeCE(p);
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = findOrInsertNodeForRootCE(ce, Collator.PRIMARY);
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    node = nodes.elementAti(index);
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(nodeHasAnyBefore(node)) {
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Get the first node that was tailored before this one at a weaker strength.
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(nodeHasBefore2(node)) {
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    node = nodes.elementAti(index);
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(nodeHasBefore3(node)) {
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = nextIndexFromNode(nodes.elementAti(nextIndexFromNode(node)));
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(isTailoredNode(nodes.elementAti(index)));
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce = tempCEFromIndexAndStrength(index, strength);
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // odd pos = [last xyz]
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Find the last node that was tailored after the [last xyz]
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // at a strength no greater than the position's strength.
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(;;) {
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int nextIndex = nextIndexFromNode(node);
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(nextIndex == 0) { break; }
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long nextNode = nodes.elementAti(nextIndex);
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(strengthFromNode(nextNode) < strength) { break; }
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = nextIndex;
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                node = nextNode;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Do not make a temporary CE for a root node.
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // This last node might be the node for the root CE itself,
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // or a node with a common secondary or tertiary weight.
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(isTailoredNode(node)) {
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce = tempCEFromIndexAndStrength(index, strength);
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce;
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Implements CollationRuleParser.Sink. */
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java 6: @Override
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) {
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String nfdPrefix;
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(prefix.length() == 0) {
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nfdPrefix = "";
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nfdPrefix = nfd.normalize(prefix);
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String nfdString = nfd.normalize(str);
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The runtime code decomposes Hangul syllables on the fly,
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // with recursive processing but without making the Jamo pieces visible for matching.
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // It does not work with certain types of contextual mappings.
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int nfdLength = nfdString.length();
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(nfdLength >= 2) {
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = nfdString.charAt(0);
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(Hangul.isJamoL(c) || Hangul.isJamoV(c)) {
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // While handling a Hangul syllable, contractions starting with Jamo L or V
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // would not see the following Jamo of that syllable.
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "contractions starting with conjoining Jamo L or V not supported");
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            c = nfdString.charAt(nfdLength - 1);
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(Hangul.isJamoL(c) ||
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    (Hangul.isJamoV(c) && Hangul.isJamoL(nfdString.charAt(nfdLength - 2)))) {
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // A contraction ending with Jamo L or L+V would require
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // generating Hangul syllables in addTailComposites() (588 for a Jamo L),
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // or decomposing a following Hangul syllable on the fly, during contraction matching.
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "contractions ending with conjoining Jamo L or L+V not supported");
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // A Hangul syllable completely inside a contraction is ok.
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: If there is a prefix, then the parser checked that
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // both the prefix and the string beging with NFC boundaries (not Jamo V or T).
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0))
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // (While handling a Hangul syllable, prefixes on Jamo V or T
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // would not see the previous Jamo of that syllable.)
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strength != Collator.IDENTICAL) {
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Find the node index after which we insert the new tailored node.
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index = findOrInsertNodeForCEs(strength);
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(cesLength > 0);
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long ce = ces[cesLength - 1];
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength == Collator.PRIMARY && !isTempCE(ce) && (ce >>> 32) == 0) {
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // There is no primary gap between ignorables and the space-first-primary.
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "tailoring primary after ignorables not supported");
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength == Collator.QUATERNARY && ce == 0) {
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The CE data structure does not support non-zero quaternary weights
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // on tertiary ignorables.
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "tailoring quaternary after tertiary ignorables not supported");
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Insert the new tailored node.
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = insertTailoredNodeAfter(index, strength);
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Strength of the temporary CE:
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The new relation may yield a stronger CE but not a weaker one.
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int tempStrength = ceStrength(ce);
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength < tempStrength) { tempStrength = strength; }
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ces[cesLength - 1] = tempCEFromIndexAndStrength(index, tempStrength);
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setCaseBits(nfdString);
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cesLengthBeforeExtension = cesLength;
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(extension.length() != 0) {
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String nfdExtension = nfd.normalize(extension);
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cesLength = dataBuilder.getCEs(nfdExtension, ces, cesLength);
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException(
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "extension string adds too many collation elements (more than 31 total)");
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ce32 = Collation.UNASSIGNED_CE32;
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((!nfdPrefix.contentEquals(prefix) || !nfdString.contentEquals(str)) &&
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                !ignorePrefix(prefix) && !ignoreString(str)) {
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Map from the original input to the CEs.
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We do this in case the canonical closure is incomplete,
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // so that it is possible to explicitly provide the missing mappings.
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = addIfDifferent(prefix, str, ces, cesLength, ce32);
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        addWithClosure(nfdPrefix, nfdString, ces, cesLength, ce32);
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cesLength = cesLengthBeforeExtension;
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Picks one of the current CEs and finds or inserts a node in the graph
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * for the CE + strength.
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findOrInsertNodeForCEs(int strength) {
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(Collator.PRIMARY <= strength && strength <= Collator.QUATERNARY);
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Find the last CE that is at least as "strong" as the requested difference.
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: Stronger is smaller (Collator.PRIMARY=0).
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long ce;
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;; --cesLength) {
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cesLength == 0) {
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce = ces[0] = 0;
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cesLength = 1;
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce = ces[cesLength - 1];
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ceStrength(ce) <= strength) { break; }
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isTempCE(ce)) {
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // No need to findCommonNode() here for lower levels
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // because insertTailoredNodeAfter() will do that anyway.
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return indexFromTempCE(ce);
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // root CE
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((int)(ce >>> 56) == Collation.UNASSIGNED_IMPLICIT_BYTE) {
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            throw new UnsupportedOperationException(
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    "tailoring relative to an unassigned code point not supported");
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return findOrInsertNodeForRootCE(ce, strength);
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findOrInsertNodeForRootCE(long ce, int strength) {
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((int)(ce >>> 56) != Collation.UNASSIGNED_IMPLICIT_BYTE);
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Find or insert the node for each of the root CE's weights,
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // down to the requested level/strength.
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Root CEs must have common=zero quaternary weights (for which we never insert any nodes).
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert((ce & 0xc0) == 0);
563f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int index = findOrInsertNodeForPrimary(ce >>> 32);
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strength >= Collator.SECONDARY) {
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int lower32 = (int)ce;
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY);
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength >= Collator.TERTIARY) {
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = findOrInsertWeakNode(index, lower32 & Collation.ONLY_TERTIARY_MASK,
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                            Collator.TERTIARY);
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return index;
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Like Java Collections.binarySearch(List, key, Comparator).
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the index>=0 where the item was found,
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         or the index<0 for inserting the string at ~index in sorted order
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         (index into rootPrimaryIndexes)
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int binarySearchForRootPrimaryNode(
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int[] rootPrimaryIndexes, int length, long[] nodes, long p) {
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(length == 0) { return ~0; }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int start = 0;
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int limit = length;
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = (start + limit) / 2;
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long node = nodes[rootPrimaryIndexes[i]];
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long nodePrimary = node >>> 32;  // weight32FromNode(node)
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (p == nodePrimary) {
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return i;
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (p < nodePrimary) {
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (i == start) {
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return ~start;  // insert s before i
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = i;
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (i == start) {
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return ~(start + 1);  // insert s after i
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                start = i;
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Finds or inserts the node for a root CE's primary weight. */
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findOrInsertNodeForPrimary(long p) {
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int rootIndex = binarySearchForRootPrimaryNode(
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            rootPrimaryIndexes.getBuffer(), rootPrimaryIndexes.size(), nodes.getBuffer(), p);
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(rootIndex >= 0) {
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return rootPrimaryIndexes.elementAti(rootIndex);
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Start a new list of nodes with this primary.
6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int index = nodes.size();
6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nodes.addElement(nodeFromWeight32(p));
6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            rootPrimaryIndexes.insertElementAt(index, ~rootIndex);
6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return index;
6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Finds or inserts the node for a secondary or tertiary weight. */
6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findOrInsertWeakNode(int index, int weight16, int level) {
6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 <= index && index < nodes.size());
625f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(Collator.SECONDARY <= level && level <= Collator.TERTIARY);
6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(weight16 == Collation.COMMON_WEIGHT16) {
6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return findCommonNode(index, level);
6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
630f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
631f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // If this will be the first below-common weight for the parent node,
632f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // then we will also need to insert a common weight after it.
633f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        long node = nodes.elementAti(index);
634f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(strengthFromNode(node) < level);  // parent node is stronger
635f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(weight16 != 0 && weight16 < Collation.COMMON_WEIGHT16) {
636f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int hasThisLevelBefore = level == Collator.SECONDARY ? HAS_BEFORE2 : HAS_BEFORE3;
637f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if((node & hasThisLevelBefore) == 0) {
638f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // The parent node has an implied level-common weight.
639f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                long commonNode =
640f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    nodeFromWeight16(Collation.COMMON_WEIGHT16) | nodeFromStrength(level);
641f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                if(level == Collator.SECONDARY) {
642f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // Move the HAS_BEFORE3 flag from the parent node
643f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // to the new secondary common node.
644f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    commonNode |= node & HAS_BEFORE3;
645f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    node &= ~(long)HAS_BEFORE3;
646f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                }
647f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                nodes.setElementAt(node | hasThisLevelBefore, index);
648f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Insert below-common-weight node.
649f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                int nextIndex = nextIndexFromNode(node);
650f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                node = nodeFromWeight16(weight16) | nodeFromStrength(level);
651f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                index = insertNodeBetween(index, nextIndex, node);
652f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Insert common-weight node.
653f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                insertNodeBetween(index, nextIndex, commonNode);
654f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Return index of below-common-weight node.
655f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return index;
656f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
657f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
658f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Find the root CE's weight for this level.
6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Postpone insertion if not found:
6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Insert the new root node before the next stronger node,
6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // or before the next root node with the same strength and a larger weight.
6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int nextIndex;
6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((nextIndex = nextIndexFromNode(node)) != 0) {
6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(nextIndex);
6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int nextStrength = strengthFromNode(node);
6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(nextStrength <= level) {
6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Insert before a stronger node.
6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(nextStrength < level) { break; }
6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // nextStrength == level
6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(!isTailoredNode(node)) {
6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int nextWeight16 = weight16FromNode(node);
6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(nextWeight16 == weight16) {
6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Found the node for the root CE up to this level.
6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return nextIndex;
6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Insert before a node with a larger same-strength weight.
6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(nextWeight16 > weight16) { break; }
6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Skip the next node.
6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = nextIndex;
6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        node = nodeFromWeight16(weight16) | nodeFromStrength(level);
6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return insertNodeBetween(index, nextIndex, node);
6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Makes and inserts a new tailored node into the list, after the one at index.
6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Skips over nodes of weaker strength to maintain collation order
6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * ("postpone insertion").
6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the new node's index
6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int insertTailoredNodeAfter(int index, int strength) {
6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 <= index && index < nodes.size());
6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strength >= Collator.SECONDARY) {
6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = findCommonNode(index, Collator.SECONDARY);
6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength >= Collator.TERTIARY) {
6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = findCommonNode(index, Collator.TERTIARY);
7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Postpone insertion:
7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Insert the new node before the next one with a strength at least as strong.
7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long node = nodes.elementAti(index);
7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int nextIndex;
7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while((nextIndex = nextIndexFromNode(node)) != 0) {
7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(nextIndex);
7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strengthFromNode(node) <= strength) { break; }
7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Skip the next node which has a weaker (larger) strength than the new one.
7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = nextIndex;
7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        node = IS_TAILORED | nodeFromStrength(strength);
7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return insertNodeBetween(index, nextIndex, node);
7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Inserts a new node into the list, between list-adjacent items.
7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The node's previous and next indexes must not be set yet.
7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the new node's index
7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int insertNodeBetween(int index, int nextIndex, long node) {
7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(previousIndexFromNode(node) == 0);
7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(nextIndexFromNode(node) == 0);
7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(nextIndexFromNode(nodes.elementAti(index)) == nextIndex);
7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Append the new node and link it to the existing nodes.
7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newIndex = nodes.size();
7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        node |= nodeFromPreviousIndex(index) | nodeFromNextIndex(nextIndex);
7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nodes.addElement(node);
7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // nodes[index].nextIndex = newIndex
7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        node = nodes.elementAti(index);
7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nodes.setElementAt(changeNodeNextIndex(node, newIndex), index);
7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // nodes[nextIndex].previousIndex = newIndex
7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(nextIndex != 0) {
7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(nextIndex);
7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            nodes.setElementAt(changeNodePreviousIndex(node, newIndex), nextIndex);
7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return newIndex;
7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Finds the node which implies or contains a common=05 weight of the given strength
742f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * (secondary or tertiary), if the current node is stronger.
7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Skips weaker nodes and tailored nodes if the current node is stronger
7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and is followed by an explicit-common-weight node.
7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Always returns the input index if that node is no stronger than the given strength.
7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int findCommonNode(int index, int strength) {
7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(Collator.SECONDARY <= strength && strength <= Collator.TERTIARY);
7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long node = nodes.elementAti(index);
7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strengthFromNode(node) >= strength) {
7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The current node is no stronger.
7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return index;
7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(strength == Collator.SECONDARY ? !nodeHasBefore2(node) : !nodeHasBefore3(node)) {
7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The current node implies the strength-common weight.
7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return index;
7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        index = nextIndexFromNode(node);
7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        node = nodes.elementAti(index);
7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(!isTailoredNode(node) && strengthFromNode(node) == strength &&
761f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                weight16FromNode(node) < Collation.COMMON_WEIGHT16);
7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Skip to the explicit common node.
7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            index = nextIndexFromNode(node);
7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            node = nodes.elementAti(index);
7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(strengthFromNode(node) >= strength);
767f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } while(isTailoredNode(node) || strengthFromNode(node) > strength ||
768f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                weight16FromNode(node) < Collation.COMMON_WEIGHT16);
7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(weight16FromNode(node) == Collation.COMMON_WEIGHT16);
7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return index;
7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void setCaseBits(CharSequence nfdString) {
7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int numTailoredPrimaries = 0;
7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < cesLength; ++i) {
7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ceStrength(ces[i]) == Collator.PRIMARY) { ++numTailoredPrimaries; }
7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We should not be able to get too many case bits because
7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // cesLength<=31==MAX_EXPANSION_LENGTH.
7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // 31 pairs of case bits fit into an long without setting its sign bit.
7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(numTailoredPrimaries <= 31);
7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long cases = 0;
7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(numTailoredPrimaries > 0) {
7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharSequence s = nfdString;
7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UTF16CollationIterator baseCEs = new UTF16CollationIterator(baseData, false, s, 0);
7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseCEsLength = baseCEs.fetchCEs() - 1;
7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(baseCEsLength >= 0 && baseCEs.getCE(baseCEsLength) == Collation.NO_CE);
7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int lastCase = 0;
7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int numBasePrimaries = 0;
7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(int i = 0; i < baseCEsLength; ++i) {
7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long ce = baseCEs.getCE(i);
7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if((ce >>> 32) != 0) {
7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++numBasePrimaries;
7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int c = ((int)ce >> 14) & 3;
7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    assert(c == 0 || c == 2);  // lowercase or uppercase, no mixed case in any base CE
7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(numBasePrimaries < numTailoredPrimaries) {
7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        cases |= (long)c << ((numBasePrimaries - 1) * 2);
8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(numBasePrimaries == numTailoredPrimaries) {
8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        lastCase = c;
8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if(c != lastCase) {
8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // There are more base primary CEs than tailored primaries.
8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Set mixed case if the case bits of the remainder differ.
8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        lastCase = 1;
8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // Nothing more can change.
8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        break;
8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(numBasePrimaries >= numTailoredPrimaries) {
8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cases |= (long)lastCase << ((numTailoredPrimaries - 1) * 2);
8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < cesLength; ++i) {
8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long ce = ces[i] & 0xffffffffffff3fffL;  // clear old case bits
8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int strength = ceStrength(ce);
8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strength == Collator.PRIMARY) {
8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce |= (cases & 3) << 14;
8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                cases >>>= 2;
8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(strength == Collator.TERTIARY) {
8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Tertiary CEs must have uppercase bits.
8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // See the LDML spec, and comments in class CollationCompare.
8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce |= 0x8000;
8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Tertiary ignorable CEs must have 0 case bits.
8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We set 0 case bits for secondary CEs too
8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // since currently only U+0345 is cased and maps to a secondary CE,
8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and it is lowercase. Other secondaries are uncased.
8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // See [[:Cased:]&[:uca1=:]] where uca1 queries the root primary weight.
8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ces[i] = ce;
8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Implements CollationRuleParser.Sink. */
8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void suppressContractions(UnicodeSet set) {
8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dataBuilder.suppressContractions(set);
8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Implements CollationRuleParser.Sink. */
8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    @Override
8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    void optimize(UnicodeSet set) {
8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        optimizeSet.addAll(set);
8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Adds the mapping and its canonical closure.
8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Takes ce32=dataBuilder.encodeCEs(...) so that the data builder
8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * need not re-encode the CEs multiple times.
8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int addWithClosure(CharSequence nfdPrefix, CharSequence nfdString,
8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long[] newCEs, int newCEsLength, int ce32) {
8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Map from the NFD input to the CEs.
8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ce32 = addIfDifferent(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ce32 = addOnlyClosure(nfdPrefix, nfdString, newCEs, newCEsLength, ce32);
8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        addTailComposites(nfdPrefix, nfdString);
8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32;
8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int addOnlyClosure(CharSequence nfdPrefix, CharSequence nfdString,
8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long[] newCEs, int newCEsLength, int ce32) {
8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // TODO: make CanonicalIterator work with CharSequence, or maybe change arguments here to String
8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(nfdPrefix.length() == 0) {
8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String prefix = "";
8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(;;) {
8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String str = stringIter.next();
8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(str == null) { break; }
8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(ignoreString(str) || str.contentEquals(nfdString)) { continue; }
8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CanonicalIterator prefixIter = new CanonicalIterator(nfdPrefix.toString());
8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CanonicalIterator stringIter = new CanonicalIterator(nfdString.toString());
8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for(;;) {
8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                String prefix = prefixIter.next();
8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(prefix == null) { break; }
8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(ignorePrefix(prefix)) { continue; }
8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                boolean samePrefix = prefix.contentEquals(nfdPrefix);
8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for(;;) {
8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    String str = stringIter.next();
8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(str == null) { break; }
8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(ignoreString(str) || (samePrefix && str.contentEquals(nfdString))) { continue; }
8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce32 = addIfDifferent(prefix, str, newCEs, newCEsLength, ce32);
8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                stringIter.reset();
8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32;
8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addTailComposites(CharSequence nfdPrefix, CharSequence nfdString) {
8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Look for the last starter in the NFD string.
8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lastStarter;
8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int indexAfterLastStarter = nfdString.length();
8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(indexAfterLastStarter == 0) { return; }  // no starter at all
9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            lastStarter = Character.codePointBefore(nfdString, indexAfterLastStarter);
9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(nfd.getCombiningClass(lastStarter) == 0) { break; }
9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            indexAfterLastStarter -= Character.charCount(lastStarter);
9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // No closure to Hangul syllables since we decompose them on the fly.
9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Hangul.isJamoL(lastStarter)) { return; }
9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Are there any composites whose decomposition starts with the lastStarter?
9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Note: Normalizer2Impl does not currently return start sets for NFC_QC=Maybe characters.
9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We might find some more equivalent mappings here if it did.
9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSet composites = new UnicodeSet();
9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!nfcImpl.getCanonStartSet(lastStarter, composites)) { return; }
9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder newNFDString = new StringBuilder(), newString = new StringBuilder();
9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] newCEs = new long[Collation.MAX_EXPANSION_LENGTH];
9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSetIterator iter = new UnicodeSetIterator(composites);
9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(iter.next()) {
9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int composite = iter.codepoint;
9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String decomp = nfd.getDecomposition(composite);
9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(!mergeCompositeIntoString(nfdString, indexAfterLastStarter, composite, decomp,
9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    newNFDString, newString)) {
9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int newCEsLength = dataBuilder.getCEs(nfdPrefix, newNFDString, newCEs, 0);
9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(newCEsLength > Collation.MAX_EXPANSION_LENGTH) {
9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Ignore mappings that we cannot store.
9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Note: It is possible that the newCEs do not make use of the mapping
9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for which we are adding the tail composites, in which case we might be adding
9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // unnecessary mappings.
9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // For example, when we add tail composites for ae^ (^=combining circumflex),
9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // UCA discontiguous-contraction matching does not find any matches
9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for ae_^ (_=any combining diacritic below) *unless* there is also
9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // a contraction mapping for ae.
9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Thus, if there is no ae contraction, then the ae^ mapping is ignored
9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // while fetching the newCEs for ae_^.
9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO: Try to detect this effectively.
9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // (Alternatively, print a warning when prefix contractions are missing.)
9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We do not need an explicit mapping for the NFD strings.
9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // It is fine if the NFD input collates like this via a sequence of mappings.
9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // It also saves a little bit of space, and may reduce the set of characters with contractions.
9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int ce32 = addIfDifferent(nfdPrefix, newString,
9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                          newCEs, newCEsLength, Collation.UNASSIGNED_CE32);
9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ce32 != Collation.UNASSIGNED_CE32) {
9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // was different, was added
9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addOnlyClosure(nfdPrefix, newNFDString, newCEs, newCEsLength, ce32);
9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean mergeCompositeIntoString(CharSequence nfdString, int indexAfterLastStarter,
9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int composite, CharSequence decomp,
9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuilder newNFDString, StringBuilder newString) {
9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(Character.codePointBefore(nfdString, indexAfterLastStarter) ==
9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Character.codePointAt(decomp, 0));
9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lastStarterLength = Character.offsetByCodePoints(decomp, 0, 1);
9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(lastStarterLength == decomp.length()) {
9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Singleton decompositions should be found by addWithClosure()
9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and the CanonicalIterator, so we can ignore them here.
9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(equalSubSequences(nfdString, indexAfterLastStarter, decomp, lastStarterLength)) {
9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // same strings, nothing new to be found here
9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Make new FCD strings that combine a composite, or its decomposition,
9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // into the nfdString's last starter and the combining marks following it.
9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Make an NFD version, and a version with the composite.
9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newNFDString.setLength(0);
9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newNFDString.append(nfdString, 0, indexAfterLastStarter);
9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newString.setLength(0);
9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newString.append(nfdString, 0, indexAfterLastStarter - lastStarterLength)
9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            .appendCodePoint(composite);
9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The following is related to discontiguous contraction matching,
9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but builds only FCD strings (or else returns false).
9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sourceIndex = indexAfterLastStarter;
9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int decompIndex = lastStarterLength;
9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Small optimization: We keep the source character across loop iterations
9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // because we do not always consume it,
9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // and then need not fetch it again nor look up its combining class again.
9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sourceChar = Collation.SENTINEL_CP;
9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // The cc variables need to be declared before the loop so that at the end
9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // they are set to the last combining classes seen.
9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sourceCC = 0;
9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int decompCC = 0;
9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(sourceChar < 0) {
9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(sourceIndex >= nfdString.length()) { break; }
9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sourceChar = Character.codePointAt(nfdString, sourceIndex);
9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sourceCC = nfd.getCombiningClass(sourceChar);
9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(sourceCC != 0);
9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We consume a decomposition character in each iteration.
9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(decompIndex >= decomp.length()) { break; }
10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int decompChar = Character.codePointAt(decomp, decompIndex);
10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            decompCC = nfd.getCombiningClass(decompChar);
10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Compare the two characters and their combining classes.
10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(decompCC == 0) {
10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Unable to merge because the source contains a non-zero combining mark
10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // but the composite's decomposition contains another starter.
10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The strings would not be equivalent.
10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(sourceCC < decompCC) {
10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Composite + sourceChar would not be FCD.
10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(decompCC < sourceCC) {
10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newNFDString.appendCodePoint(decompChar);
10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decompIndex += Character.charCount(decompChar);
10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if(decompChar != sourceChar) {
10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Blocked because same combining class.
10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {  // match: decompChar == sourceChar
10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newNFDString.appendCodePoint(decompChar);
10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                decompIndex += Character.charCount(decompChar);
10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sourceIndex += Character.charCount(decompChar);
10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                sourceChar = Collation.SENTINEL_CP;
10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // We are at the end of at least one of the two inputs.
10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(sourceChar >= 0) {  // more characters from nfdString but not from decomp
10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(sourceCC < decompCC) {
10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Appending the next source character to the composite would not be FCD.
10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newNFDString.append(nfdString, sourceIndex, nfdString.length());
10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newString.append(nfdString, sourceIndex, nfdString.length());
10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(decompIndex < decomp.length()) {  // more characters from decomp, not from nfdString
10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newNFDString.append(decomp, decompIndex, decomp.length());
10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(nfd.isNormalized(newNFDString));
10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(fcd.isNormalized(newString));
10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(nfd.normalize(newString).equals(newNFDString.toString()));  // canonically equivalent
10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean equalSubSequences(CharSequence left, int leftStart, CharSequence right, int rightStart) {
10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // C++ UnicodeString::compare(leftStart, 0x7fffffff, right, rightStart, 0x7fffffff) == 0
10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int leftLength = left.length();
10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if((leftLength - leftStart) != (right.length() - rightStart)) { return false; }
10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(leftStart < leftLength) {
10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(left.charAt(leftStart++) != right.charAt(rightStart++)) {
10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean ignorePrefix(CharSequence s) {
10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Do not map non-FCD prefixes.
10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isFCD(s);
10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean ignoreString(CharSequence s) {
10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Do not map non-FCD strings.
10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Do not map strings that start with Hangul syllables: We decompose those on the fly.
10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isFCD(s) || Hangul.isHangul(s.charAt(0));
10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isFCD(CharSequence s) {
10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return fcd.isNormalized(s);
10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final UnicodeSet COMPOSITES = new UnicodeSet("[:NFD_QC=N:]");
10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static {
10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Hangul is decomposed on the fly during collation.
10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        COMPOSITES.remove(Hangul.HANGUL_BASE, Hangul.HANGUL_END);
10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void closeOverComposites() {
10727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String prefix = "";  // empty
10737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        UnicodeSetIterator iter = new UnicodeSetIterator(COMPOSITES);
10747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(iter.next()) {
10757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(iter.codepoint != UnicodeSetIterator.IS_STRING);
10767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String nfdString = nfd.getDecomposition(iter.codepoint);
10777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cesLength = dataBuilder.getCEs(nfdString, ces, 0);
10787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
10797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Too many CEs from the decomposition (unusual), ignore this composite.
10807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // We could add a capacity parameter to getCEs() and reallocate if necessary.
10817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // However, this can only really happen in contrived cases.
10827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                continue;
10837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            String composite = iter.getString();
10857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addIfDifferent(prefix, composite, ces, cesLength, Collation.UNASSIGNED_CE32);
10867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
10887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
10897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int addIfDifferent(CharSequence prefix, CharSequence str,
10907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long[] newCEs, int newCEsLength, int ce32) {
10917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] oldCEs = new long[Collation.MAX_EXPANSION_LENGTH];
10927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int oldCEsLength = dataBuilder.getCEs(prefix, str, oldCEs, 0);
10937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(!sameCEs(newCEs, newCEsLength, oldCEs, oldCEsLength)) {
10947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ce32 == Collation.UNASSIGNED_CE32) {
10957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = dataBuilder.encodeCEs(newCEs, newCEsLength);
10967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
10977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            dataBuilder.addCE32(prefix, str, ce32);
10987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
10997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32;
11007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean sameCEs(long[] ces1, int ces1Length,
11037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                long[] ces2, int ces2Length) {
11047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(ces1Length != ces2Length) {
11057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return false;
11067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(ces1Length <= Collation.MAX_EXPANSION_LENGTH);
11087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < ces1Length; ++i) {
11097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(ces1[i] != ces2[i]) { return false; }
11107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return true;
11127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int alignWeightRight(int w) {
11157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(w != 0) {
11167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while((w & 0xff) == 0) { w >>>= 8; }
11177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
11187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return w;
11197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
11207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
11227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Walks the tailoring graph and overwrites tailored nodes with new CEs.
11237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * After this, the graph is destroyed.
11247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The nodes array can then be used only as a source of tailored CEs.
11257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
11267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void makeTailoredCEs() {
11277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationWeights primaries = new CollationWeights();
11287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationWeights secondaries = new CollationWeights();
11297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationWeights tertiaries = new CollationWeights();
11307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long[] nodesArray = nodes.getBuffer();
1131f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(DEBUG) {
1132f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            System.out.println("\nCollationBuilder.makeTailoredCEs()");
1133f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
11347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
11357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int rpi = 0; rpi < rootPrimaryIndexes.size(); ++rpi) {
11367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int i = rootPrimaryIndexes.elementAti(rpi);
11377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long node = nodesArray[i];
11387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long p = weight32FromNode(node);
11397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int s = p == 0 ? 0 : Collation.COMMON_WEIGHT16;
11407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int t = s;
11417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int q = 0;
11427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean pIsTailored = false;
11437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean sIsTailored = false;
11447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            boolean tIsTailored = false;
11457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(DEBUG) {
11467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                System.out.printf("\nprimary     %x\n", alignWeightRight((int)p));
11477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
11487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int pIndex = p == 0 ? 0 : rootElements.findPrimary(p);
11497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int nextIndex = nextIndexFromNode(node);
11507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while(nextIndex != 0) {
11517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                i = nextIndex;
11527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                node = nodesArray[i];
11537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                nextIndex = nextIndexFromNode(node);
11547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int strength = strengthFromNode(node);
11557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(strength == Collator.QUATERNARY) {
11567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    assert(isTailoredNode(node));
11577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(DEBUG) {
11587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        System.out.print("      quat+     ");
11597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(q == 3) {
11617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        // C++ U_BUFFER_OVERFLOW_ERROR
11627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        throw new UnsupportedOperationException("quaternary tailoring gap too small");
11637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
11647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++q;
11657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
11667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(strength == Collator.TERTIARY) {
11677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(isTailoredNode(node)) {
11687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(DEBUG) {
11697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                System.out.print("    ter+        ");
11707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
11717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(!tIsTailored) {
11727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // First tailored tertiary node for [p, s].
11737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                int tCount = countTailoredNodes(nodesArray, nextIndex,
11747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                                    Collator.TERTIARY) + 1;
11757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                int tLimit;
11767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(t == 0) {
11777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // Gap at the beginning of the tertiary CE range.
11787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    t = rootElements.getTertiaryBoundary() - 0x100;
11797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    tLimit = (int)rootElements.getFirstTertiaryCE() & Collation.ONLY_TERTIARY_MASK;
11807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                } else if(!pIsTailored && !sIsTailored) {
11817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // p and s are root weights.
11827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    tLimit = rootElements.getTertiaryAfter(pIndex, s, t);
1183f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                } else if(t == Collation.BEFORE_WEIGHT16) {
1184f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                    tLimit = Collation.COMMON_WEIGHT16;
11857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                } else {
11867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // [p, s] is tailored.
11877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    assert(t == Collation.COMMON_WEIGHT16);
11887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    tLimit = rootElements.getTertiaryBoundary();
11897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
11907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                assert(tLimit == 0x4000 || (tLimit & ~Collation.ONLY_TERTIARY_MASK) == 0);
11917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                tertiaries.initForTertiary();
11927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(!tertiaries.allocWeights(t, tLimit, tCount)) {
11937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // C++ U_BUFFER_OVERFLOW_ERROR
11947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    throw new UnsupportedOperationException("tertiary tailoring gap too small");
11957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
11967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                tIsTailored = true;
11977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
11987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            t = (int)tertiaries.nextWeight();
11997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            assert(t != 0xffffffff);
12007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else {
12017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            t = weight16FromNode(node);
12027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            tIsTailored = false;
12037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(DEBUG) {
12047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                System.out.printf("    ter     %x\n", alignWeightRight(t));
12057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
12067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
12077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else {
12087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if(strength == Collator.SECONDARY) {
12097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(isTailoredNode(node)) {
12107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(DEBUG) {
12117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    System.out.print("  sec+          ");
12127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
12137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(!sIsTailored) {
12147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // First tailored secondary node for p.
12157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    int sCount = countTailoredNodes(nodesArray, nextIndex,
12167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                                        Collator.SECONDARY) + 1;
12177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    int sLimit;
12187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    if(s == 0) {
12197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // Gap at the beginning of the secondary CE range.
12207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        s = rootElements.getSecondaryBoundary() - 0x100;
12217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        sLimit = (int)(rootElements.getFirstSecondaryCE() >> 16);
12227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    } else if(!pIsTailored) {
12237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // p is a root primary.
12247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        sLimit = rootElements.getSecondaryAfter(pIndex, s);
1225f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                    } else if(s == Collation.BEFORE_WEIGHT16) {
1226f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                        sLimit = Collation.COMMON_WEIGHT16;
12277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    } else {
12287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // p is a tailored primary.
12297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        assert(s == Collation.COMMON_WEIGHT16);
12307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        sLimit = rootElements.getSecondaryBoundary();
12317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    }
12327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    if(s == Collation.COMMON_WEIGHT16) {
12337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // Do not tailor into the getSortKey() range of
12347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // compressed common secondaries.
12357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        s = rootElements.getLastCommonSecondary();
12367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    }
12377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    secondaries.initForSecondary();
12387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    if(!secondaries.allocWeights(s, sLimit, sCount)) {
12397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        // C++ U_BUFFER_OVERFLOW_ERROR
1240f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                        if(DEBUG) {
1241f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                            System.out.printf("!secondaries.allocWeights(%x, %x, sCount=%d)\n",
1242f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                                    alignWeightRight(s), alignWeightRight(sLimit),
1243f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                                    alignWeightRight(sCount));
1244f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                                        }
12457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                        throw new UnsupportedOperationException("secondary tailoring gap too small");
12467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    }
12477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    sIsTailored = true;
12487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
12497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                s = (int)secondaries.nextWeight();
12507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                assert(s != 0xffffffff);
12517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            } else {
12527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                s = weight16FromNode(node);
12537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                sIsTailored = false;
12547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(DEBUG) {
12557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    System.out.printf("  sec       %x\n", alignWeightRight(s));
12567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
12577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
12587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else /* Collator.PRIMARY */ {
12597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            assert(isTailoredNode(node));
12607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(DEBUG) {
12617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                System.out.print("pri+            ");
12627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
12637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if(!pIsTailored) {
12647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                // First tailored primary node in this list.
12657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                int pCount = countTailoredNodes(nodesArray, nextIndex,
12667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                                    Collator.PRIMARY) + 1;
12677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                boolean isCompressible = baseData.isCompressiblePrimary(p);
12687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                long pLimit =
12697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    rootElements.getPrimaryAfter(p, pIndex, isCompressible);
12707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                primaries.initForPrimary(isCompressible);
12717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                if(!primaries.allocWeights(p, pLimit, pCount)) {
12727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    // C++ U_BUFFER_OVERFLOW_ERROR  // TODO: introduce a more specific UErrorCode?
12737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                    throw new UnsupportedOperationException("primary tailoring gap too small");
12747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                }
12757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                pIsTailored = true;
12767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
12777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            p = primaries.nextWeight();
12787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            assert(p != 0xffffffffL);
12797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            s = Collation.COMMON_WEIGHT16;
12807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            sIsTailored = false;
12817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
12827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        t = s == 0 ? 0 : Collation.COMMON_WEIGHT16;
12837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        tIsTailored = false;
12847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    q = 0;
12867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isTailoredNode(node)) {
12887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    nodesArray[i] = Collation.makeCE(p, s, t, q);
12897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(DEBUG) {
12907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        System.out.printf("%016x\n", nodesArray[i]);
12917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
12927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
12937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
12947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
12957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
12967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
12977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
12987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Counts the tailored nodes of the given strength up to the next node
12997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * which is either stronger or has an explicit weight of this strength.
13007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int countTailoredNodes(long[] nodesArray, int i, int strength) {
13027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int count = 0;
13037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(;;) {
13047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(i == 0) { break; }
13057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long node = nodesArray[i];
13067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strengthFromNode(node) < strength) { break; }
13077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(strengthFromNode(node) == strength) {
13087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(isTailoredNode(node)) {
13097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++count;
13107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
13117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
13127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
13137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            i = nextIndexFromNode(node);
13157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return count;
13177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final class CEFinalizer implements CollationDataBuilder.CEModifier {
13207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEFinalizer(long[] ces) {
13217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            finalCEs = ces;
13227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long modifyCE32(int ce32) {
13247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert(!Collation.isSpecialCE32(ce32));
13257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(CollationBuilder.isTempCE32(ce32)) {
13267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // retain case bits
13277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return finalCEs[CollationBuilder.indexFromTempCE32(ce32)] | ((ce32 & 0xc0) << 8);
13287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
13297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collation.NO_CE;
13307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public long modifyCE(long ce) {
13337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(CollationBuilder.isTempCE(ce)) {
13347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // retain case bits
13357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return finalCEs[CollationBuilder.indexFromTempCE(ce)] | (ce & 0xc000);
13367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
13377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collation.NO_CE;
13387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
13397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
13407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private long[] finalCEs;
13427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    };
13437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Replaces temporary CEs with the final CEs they point to. */
13457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void finalizeCEs() {
13467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationDataBuilder newBuilder = new CollationDataBuilder();
13477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newBuilder.initForTailoring(baseData);
13487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CEFinalizer finalizer = new CEFinalizer(nodes.getBuffer());
13497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        newBuilder.copyFrom(dataBuilder, finalizer);
13507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        dataBuilder = newBuilder;
13517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
13547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Encodes "temporary CE" data into a CE that fits into the CE32 data structure,
13557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * with 2-byte primary, 1-byte secondary and 6-bit tertiary,
13567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * with valid CE byte values.
13577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The index must not exceed 20 bits (0xfffff).
13597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The strength must fit into 2 bits (Collator.PRIMARY..Collator.QUATERNARY).
13607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Temporary CEs are distinguished from real CEs by their use of
13627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * secondary weights 06..45 which are otherwise reserved for compressed sort keys.
13637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
13647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The case bits are unused and available.
13657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
13667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long tempCEFromIndexAndStrength(int index, int strength) {
13677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return
13687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // CE byte offsets, to ensure valid CE bytes, and case bits 11
13697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            0x4040000006002000L +
13707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // index bits 19..13 -> primary byte 1 = CE bits 63..56 (byte values 40..BF)
13717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((long)(index & 0xfe000) << 43) +
13727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // index bits 12..6 -> primary byte 2 = CE bits 55..48 (byte values 40..BF)
13737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((long)(index & 0x1fc0) << 42) +
13747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // index bits 5..0 -> secondary byte 1 = CE bits 31..24 (byte values 06..45)
13757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((index & 0x3f) << 24) +
13767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // strength bits 1..0 -> tertiary byte 1 = CE bits 13..8 (byte values 20..23)
13777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (strength << 8);
13787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int indexFromTempCE(long tempCE) {
13807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tempCE -= 0x4040000006002000L;
13817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return
13827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((int)(tempCE >> 43) & 0xfe000) |
13837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((int)(tempCE >> 42) & 0x1fc0) |
13847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((int)(tempCE >> 24) & 0x3f);
13857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int strengthFromTempCE(long tempCE) {
13877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((int)tempCE >> 8) & 3;
13887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isTempCE(long ce) {
13907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int sec = (int)ce >>> 24;
13917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return 6 <= sec && sec <= 0x45;
13927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
13937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
13947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int indexFromTempCE32(int tempCE32) {
13957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tempCE32 -= 0x40400620;
13967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return
13977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((tempCE32 >> 11) & 0xfe000) |
13987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((tempCE32 >> 10) & 0x1fc0) |
13997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((tempCE32 >> 8) & 0x3f);
14007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isTempCE32(int ce32) {
14027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return
14037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (ce32 & 0xff) >= 2 &&  // not a long-primary/long-secondary CE32
14047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            6 <= ((ce32 >> 8) & 0xff) && ((ce32 >> 8) & 0xff) <= 0x45;
14057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int ceStrength(long ce) {
14087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return
14097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isTempCE(ce) ? strengthFromTempCE(ce) :
14107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            (ce & 0xff00000000000000L) != 0 ? Collator.PRIMARY :
14117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ((int)ce & 0xff000000) != 0 ? Collator.SECONDARY :
14127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce != 0 ? Collator.TERTIARY :
14137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Collator.IDENTICAL;
14147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** At most 1M nodes, limited by the 20 bits in node bit fields. */
14177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int MAX_INDEX = 0xfffff;
14187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1419f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Node bit 6 is set on a primary node if there are nodes
1420f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * with secondary values below the common secondary weight (05).
14217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int HAS_BEFORE2 = 0x40;
14237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1424f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Node bit 5 is set on a primary or secondary node if there are nodes
1425f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * with tertiary values below the common tertiary weight (05).
14267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int HAS_BEFORE3 = 0x20;
14287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
14297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Node bit 3 distinguishes a tailored node, which has no weight value,
14307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from a node with an explicit (root or default) weight.
14317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
14327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int IS_TAILORED = 8;
14337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long nodeFromWeight32(long weight32) {
14357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return weight32 << 32;
14367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long nodeFromWeight16(int weight16) {
14387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)weight16 << 48;
14397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long nodeFromPreviousIndex(int previous) {
14417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)previous << 28;
14427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long nodeFromNextIndex(int next) {
14447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return next << 8;
14457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long nodeFromStrength(int strength) {
14477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return strength;
14487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long weight32FromNode(long node) {
14517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return node >>> 32;
14527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int weight16FromNode(long node) {
14547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (int)(node >> 48) & 0xffff;
14557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int previousIndexFromNode(long node) {
14577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (int)(node >> 28) & MAX_INDEX;
14587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int nextIndexFromNode(long node) {
14607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((int)node >> 8) & MAX_INDEX;
14617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static int strengthFromNode(long node) {
14637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (int)node & 3;
14647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean nodeHasBefore2(long node) {
14677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & HAS_BEFORE2) != 0;
14687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean nodeHasBefore3(long node) {
14707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & HAS_BEFORE3) != 0;
14717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean nodeHasAnyBefore(long node) {
14737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & (HAS_BEFORE2 | HAS_BEFORE3)) != 0;
14747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static boolean isTailoredNode(long node) {
14767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & IS_TAILORED) != 0;
14777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long changeNodePreviousIndex(long node, int previous) {
14807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & 0xffff00000fffffffL) | nodeFromPreviousIndex(previous);
14817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static long changeNodeNextIndex(long node, int next) {
14837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (node & 0xfffffffff00000ffL) | nodeFromNextIndex(next);
14847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
14857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2 nfd, fcd;
14877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private Normalizer2Impl nfcImpl;
14887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationTailoring base;
14907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationData baseData;
14917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationRootElements rootElements;
14927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private long variableTop;
14937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationDataBuilder dataBuilder;
14957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean fastLatinEnabled;
14967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UnicodeSet optimizeSet = new UnicodeSet();
14977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
14987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private long[] ces = new long[Collation.MAX_EXPANSION_LENGTH];
14997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int cesLength;
15007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
15017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Indexes of nodes with root primary weights, sorted by primary.
15037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Compact form of a TreeMap from root primary to node index.
15047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is a performance optimization for finding reset positions.
15067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Without this, we would have to search through the entire nodes list.
15077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * It also allows storing root primary weights in list head nodes,
15087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * without previous index, leaving room in root primary nodes for 32-bit primary weights.
15097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UVector32 rootPrimaryIndexes;
15117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
15127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Data structure for assigning tailored weights and CEs.
15137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Doubly-linked lists of nodes in mostly collation order.
15147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Each list starts with a root primary node and ends with a nextIndex of 0.
15157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * When there are any nodes in the list, then there is always a root primary node at index 0.
15177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This allows some code not to have to check explicitly for nextIndex==0.
15187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Root primary nodes have 32-bit weights but do not have previous indexes.
15207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * All other nodes have at most 16-bit weights and do have previous indexes.
15217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Nodes with explicit weights store root collator weights,
15237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or default weak weights (e.g., secondary 05) for stronger nodes.
15247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * "Tailored" nodes, with the IS_TAILORED bit set,
15257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * do not store explicit weights but rather
15267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * create a difference of a certain strength from the preceding node.
15277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A root node is followed by either
15297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - a root/default node of the same strength, or
15307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - a root/default node of the next-weaker strength, or
15317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - a tailored node of the same strength.
15327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A node of a given strength normally implies "common" weights on weaker levels.
15347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A node with HAS_BEFORE2 must be immediately followed by
1536f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * a secondary node with an explicit below-common weight, then a secondary tailored node,
15377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and later an explicit common-secondary node.
1538f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * The below-common weight can be a root weight,
1539f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * or it can be BEFORE_WEIGHT16 for tailoring before an implied common weight
1540f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * or before the lowest root weight.
1541f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * (&[before 2] resets to an explicit secondary node so that
15427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the following addRelation(secondary) tailors right after that.
15437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If we did not have this node and instead were to reset on the primary node,
15447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then addRelation(secondary) would skip forward to the the COMMON_WEIGHT16 node.)
15457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If the flag is not set, then there are no explicit secondary nodes
15477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * with the common or lower weights.
15487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Same for HAS_BEFORE3 for tertiary nodes and weights.
15507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A node must not have both flags set.
15517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tailored CEs are initially represented in a CollationDataBuilder as temporary CEs
15537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * which point to stable indexes in this list,
15547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and temporary CEs stored in a CollationDataBuilder only point to tailored nodes.
15557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A temporary CE in the ces[] array may point to a non-tailored reset-before-position node,
15577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * until the next relation is added.
15587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * At the end, the tailored weights are allocated as necessary,
15607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then the tailored nodes are replaced with final CEs,
15617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the CollationData is rewritten by replacing temporary CEs with final ones.
15627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We cannot simply insert new nodes in the middle of the array
15647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * because that would invalidate the indexes stored in existing temporary CEs.
15657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We need to use a linked graph with stable indexes to existing nodes.
15667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A doubly-linked list seems easiest to maintain.
15677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Each node is stored as an long, with its fields stored as bit fields.
15697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Root primary node:
15717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - primary weight: 32 bits 63..32
15727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - reserved/unused/zero: 4 bits 31..28
15737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Weaker root nodes & tailored nodes:
15757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - a weight: 16 bits 63..48
15767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   + a root or default weight for a non-tailored node
15777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   + unused/zero for a tailored node
15787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - index to the previous node: 20 bits 47..28
15797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * All types of nodes:
15817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - index to the next node: 20 bits 27..8
15827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *   + nextIndex=0 in last node per root-primary list
15837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - reserved/unused/zero bits: bits 7, 4, 2
15847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - HAS_BEFORE2: bit 6
15857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - HAS_BEFORE3: bit 5
15867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - IS_TAILORED: bit 3
15877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * - the difference strength (primary/secondary/tertiary/quaternary): 2 bits 1..0
15887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
15897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We could allocate structs with pointers, but we would have to store them
15907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * in a pointer list so that they can be indexed from temporary CEs,
15917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and they would require more memory allocations.
15927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
15937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UVector64 nodes;
15947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
1595