12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2013-2014, International Business Machines
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* TailoredSet.java, ported from collationsets.h/.cpp
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2013feb09
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl.Hangul;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Trie2;
202d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubertimport com.ibm.icu.impl.Utility;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie.Entry;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds the set of characters and strings that sort differently in the tailoring
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from the base data.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Every mapping in the tailoring needs to be compared to the base,
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * because some mappings are copied for optimization, and
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * all contractions for a character are copied if any contractions for that character
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are added, modified or removed.
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It might be simpler to re-parse the rule string, but:
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would require duplicating some of the from-rules builder code.
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would make the runtime code depend on the builder.
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would only work if we have the rule string, and we allow users to
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   omit the rule string from data files.
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class TailoredSet {
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationData data;
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationData baseData;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UnicodeSet tailored;
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder unreversedPrefix = new StringBuilder();
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String suffix;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public TailoredSet(UnicodeSet t) {
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored = t;
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void forData(CollationData d) {
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data = d;
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        baseData = d.base;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (baseData != null);
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // utrie2_enum(data->trie, NULL, enumTailoredRange, this);
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator = data.trie.iterator();
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            enumTailoredRange(range.startCodePoint, range.endCodePoint, range.value, this);
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void enumTailoredRange(int start, int end, int ce32, TailoredSet ts) {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ce32 == Collation.FALLBACK_CE32) {
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return; // fallback to base, not tailored
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ts.handleCE32(start, end, ce32);
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note: ICU4C returns U_SUCCESS(error) and it's not applicable to ICU4J.
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  Also, ICU4C requires handleCE32() to be public because it is used by the callback
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  function (enumTailoredRange()). This is not necessary for Java implementation.
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void handleCE32(int start, int end, int ce32) {
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (ce32 != Collation.FALLBACK_CE32);
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(ce32)) {
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = data.getIndirectCE32(ce32);
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ce32 == Collation.FALLBACK_CE32) {
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseCE32 = baseData.getFinalCE32(baseData.getCE32(start));
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Do not just continue if ce32 == baseCE32 because
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // contractions and expansions in different data objects
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // normally differ even if they have the same data offsets.
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isSelfContainedCE32(ce32) && Collation.isSelfContainedCE32(baseCE32)) {
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // fastpath
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ce32 != baseCE32) {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tailored.add(start);
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(start, ce32, baseCE32);
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while (++start <= end);
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void compare(int c, int ce32, int baseCE32) {
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isPrefixCE32(ce32)) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int dataIndex = Collation.indexFromCE32(ce32);
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isPrefixCE32(baseCE32)) {
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int baseIndex = Collation.indexFromCE32(baseCE32);
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                comparePrefixes(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefixes(data, c, data.contexts, dataIndex + 2);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (Collation.isPrefixCE32(baseCE32)) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseIndex = Collation.indexFromCE32(baseCE32);
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addPrefixes(baseData, c, baseData.contexts, baseIndex + 2);
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isContractionCE32(ce32)) {
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int dataIndex = Collation.indexFromCE32(ce32);
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = Collation.NO_CE32;
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isContractionCE32(baseCE32)) {
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int baseIndex = Collation.indexFromCE32(baseCE32);
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if ((baseCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    baseCE32 = Collation.NO_CE32;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compareContractions(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addContractions(c, data.contexts, dataIndex + 2);
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (Collation.isContractionCE32(baseCE32)) {
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseIndex = Collation.indexFromCE32(baseCE32);
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addContractions(c, baseData.contexts, baseIndex + 2);
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int tag;
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(ce32)) {
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tag = Collation.tagFromCE32(ce32);
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.PREFIX_TAG);
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.CONTRACTION_TAG);
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Currently, the tailoring data builder does not write offset tags.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // They might be useful for saving space,
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // but they would complicate the builder,
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and in tailorings we assume that performance of tailored characters is more important.
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.OFFSET_TAG);
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tag = -1;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int baseTag;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(baseCE32)) {
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseTag = Collation.tagFromCE32(baseCE32);
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (baseTag != Collation.PREFIX_TAG);
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (baseTag != Collation.CONTRACTION_TAG);
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseTag = -1;
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Non-contextual mappings, expansions, etc.
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (baseTag == Collation.OFFSET_TAG) {
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We might be comparing a tailoring CE which is a copy of
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // a base offset-tag CE, via the [optimize [set]] syntax
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // or when a single-character mapping was copied for tailored contractions.
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Offset tags always result in long-primary CEs,
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // with common secondary/tertiary weights.
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!Collation.isLongPrimaryCE32(ce32)) {
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long dataCE = baseData.ces[Collation.indexFromCE32(baseCE32)];
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long p = Collation.getThreeBytePrimaryForOffsetData(c, dataCE);
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.primaryFromLongPrimaryCE32(ce32) != p) {
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (tag != baseTag) {
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            add(c);
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (tag == Collation.EXPANSION32_TAG) {
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Collation.lengthFromCE32(ce32);
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseLength = Collation.lengthFromCE32(baseCE32);
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length != baseLength) {
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx0 = Collation.indexFromCE32(ce32);
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx1 = Collation.indexFromCE32(baseCE32);
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (data.ce32s[idx0 + i] != baseData.ce32s[idx1 + i]) {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    add(c);
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (tag == Collation.EXPANSION_TAG) {
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Collation.lengthFromCE32(ce32);
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseLength = Collation.lengthFromCE32(baseCE32);
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length != baseLength) {
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx0 = Collation.indexFromCE32(ce32);
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx1 = Collation.indexFromCE32(baseCE32);
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (data.ces[idx0 + i] != baseData.ces[idx1 + i]) {
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    add(c);
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (tag == Collation.HANGUL_TAG) {
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder jamos = new StringBuilder();
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Hangul.decompose(c, jamos);
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tailored.contains(jamos.charAt(0)) || tailored.contains(jamos.charAt(1))
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (length == 3 && tailored.contains(jamos.charAt(2)))) {
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (ce32 != baseCE32) {
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            add(c);
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void comparePrefixes(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parallel iteration over prefixes of both tables.
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator basePrefixes = new CharsTrie(q, qidx).iterator();
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String tp = null; // Tailoring prefix.
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String bp = null; // Base prefix.
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Use a string with a U+FFFF as the limit sentinel.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // U+FFFF is untailorable and will not occur in prefixes.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String none = "\uffff";
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Entry te = null, be = null;
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tp == null) {
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (prefixes.hasNext()) {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = prefixes.next();
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tp = te.chars.toString();
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = null;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tp = none;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (bp == null) {
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (basePrefixes.hasNext()) {
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = basePrefixes.next();
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bp = be.chars.toString();
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = null;
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bp = none;
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2622d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            if (Utility.sameObjects(tp, none) && Utility.sameObjects(bp, none)) {
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cmp = tp.compareTo(bp);
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cmp < 0) {
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // tp occurs in the tailoring but not in the base.
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (te != null);
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefix(data, tp, c, te.value);
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = null;
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tp = null;
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cmp > 0) {
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // bp occurs in the base but not in the tailoring.
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (be != null);
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefix(baseData, bp, c, be.value);
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                be = null;
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bp = null;
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setPrefix(tp);
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (te != null && be != null);
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(c, te.value, be.value);
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                resetPrefix();
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = be = null;
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tp = bp = null;
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void compareContractions(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parallel iteration over suffixes of both tables.
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator baseSuffixes = new CharsTrie(q, qidx).iterator();
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String ts = null; // Tailoring suffix.
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String bs = null; // Base suffix.
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Use a string with two U+FFFF as the limit sentinel.
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // U+FFFF is untailorable and will not occur in contractions except maybe
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // as a single suffix character for a root-collator boundary contraction.
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String none = "\uffff\uffff";
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Entry te = null, be = null;
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ts == null) {
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (suffixes.hasNext()) {
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = suffixes.next();
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ts = te.chars.toString();
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = null;
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ts = none;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (bs == null) {
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (baseSuffixes.hasNext()) {
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = baseSuffixes.next();
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bs = be.chars.toString();
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = null;
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bs = none;
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3192d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert            if (Utility.sameObjects(ts, none) && Utility.sameObjects(bs, none)) {
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cmp = ts.compareTo(bs);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cmp < 0) {
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // ts occurs in the tailoring but not in the base.
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addSuffix(c, ts);
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = null;
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ts = null;
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cmp > 0) {
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // bs occurs in the base but not in the tailoring.
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addSuffix(c, bs);
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                be = null;
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bs = null;
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                suffix = ts;
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(c, te.value, be.value);
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                suffix = null;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = be = null;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ts = bs = null;
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addPrefixes(CollationData d, int c, CharSequence p, int pidx) {
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (prefixes.hasNext()) {
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Entry e = prefixes.next();
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addPrefix(d, e.chars, c, e.value);
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addPrefix(CollationData d, CharSequence pfx, int c, int ce32) {
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setPrefix(pfx);
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ce32 = d.getFinalCE32(ce32);
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isContractionCE32(ce32)) {
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx = Collation.indexFromCE32(ce32);
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addContractions(c, d.contexts, idx + 2);
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored.add(new StringBuilder(unreversedPrefix.appendCodePoint(c)));
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        resetPrefix();
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addContractions(int c, CharSequence p, int pidx) {
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (suffixes.hasNext()) {
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Entry e = suffixes.next();
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addSuffix(c, e.chars);
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addSuffix(int c, CharSequence sfx) {
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored.add(new StringBuilder(unreversedPrefix).appendCodePoint(c).append(sfx));
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void add(int c) {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (unreversedPrefix.length() == 0 && suffix == null) {
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailored.add(c);
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder s = new StringBuilder(unreversedPrefix);
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s.appendCodePoint(c);
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (suffix != null) {
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s.append(suffix);
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailored.add(s);
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Prefixes are reversed in the data structure.
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void setPrefix(CharSequence pfx) {
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.setLength(0);
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.append(pfx).reverse();
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void resetPrefix() {
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.setLength(0);
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
398