17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2013-2014, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* TailoredSet.java, ported from collationsets.h/.cpp
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2013feb09
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl.Hangul;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Trie2;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie.Entry;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds the set of characters and strings that sort differently in the tailoring
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * from the base data.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Every mapping in the tailoring needs to be compared to the base,
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * because some mappings are copied for optimization, and
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * all contractions for a character are copied if any contractions for that character
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * are added, modified or removed.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * It might be simpler to re-parse the rule string, but:
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would require duplicating some of the from-rules builder code.
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would make the runtime code depend on the builder.
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * - That would only work if we have the rule string, and we allow users to
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *   omit the rule string from data files.
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class TailoredSet {
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationData data;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private CollationData baseData;
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private UnicodeSet tailored;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private StringBuilder unreversedPrefix = new StringBuilder();
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String suffix;
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public TailoredSet(UnicodeSet t) {
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored = t;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void forData(CollationData d) {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data = d;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        baseData = d.base;
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (baseData != null);
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // utrie2_enum(data->trie, NULL, enumTailoredRange, this);
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Iterator<Trie2.Range> trieIterator = data.trie.iterator();
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Trie2.Range range;
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (trieIterator.hasNext() && !(range = trieIterator.next()).leadSurrogate) {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            enumTailoredRange(range.startCodePoint, range.endCodePoint, range.value, this);
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void enumTailoredRange(int start, int end, int ce32, TailoredSet ts) {
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (ce32 == Collation.FALLBACK_CE32) {
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return; // fallback to base, not tailored
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ts.handleCE32(start, end, ce32);
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Java porting note: ICU4C returns U_SUCCESS(error) and it's not applicable to ICU4J.
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  Also, ICU4C requires handleCE32() to be public because it is used by the callback
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    //  function (enumTailoredRange()). This is not necessary for Java implementation.
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void handleCE32(int start, int end, int ce32) {
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (ce32 != Collation.FALLBACK_CE32);
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(ce32)) {
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = data.getIndirectCE32(ce32);
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ce32 == Collation.FALLBACK_CE32) {
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        do {
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseCE32 = baseData.getFinalCE32(baseData.getCE32(start));
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Do not just continue if ce32 == baseCE32 because
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // contractions and expansions in different data objects
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // normally differ even if they have the same data offsets.
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isSelfContainedCE32(ce32) && Collation.isSelfContainedCE32(baseCE32)) {
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // fastpath
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (ce32 != baseCE32) {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tailored.add(start);
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(start, ce32, baseCE32);
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } while (++start <= end);
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void compare(int c, int ce32, int baseCE32) {
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isPrefixCE32(ce32)) {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int dataIndex = Collation.indexFromCE32(ce32);
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isPrefixCE32(baseCE32)) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int baseIndex = Collation.indexFromCE32(baseCE32);
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                comparePrefixes(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefixes(data, c, data.contexts, dataIndex + 2);
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (Collation.isPrefixCE32(baseCE32)) {
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseIndex = Collation.indexFromCE32(baseCE32);
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addPrefixes(baseData, c, baseData.contexts, baseIndex + 2);
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isContractionCE32(ce32)) {
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int dataIndex = Collation.indexFromCE32(ce32);
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = Collation.NO_CE32;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = data.getFinalCE32(data.getCE32FromContexts(dataIndex));
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.isContractionCE32(baseCE32)) {
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int baseIndex = Collation.indexFromCE32(baseCE32);
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if ((baseCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    baseCE32 = Collation.NO_CE32;
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compareContractions(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addContractions(c, data.contexts, dataIndex + 2);
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (Collation.isContractionCE32(baseCE32)) {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseIndex = Collation.indexFromCE32(baseCE32);
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseCE32 = baseData.getFinalCE32(baseData.getCE32FromContexts(baseIndex));
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addContractions(c, baseData.contexts, baseIndex + 2);
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int tag;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(ce32)) {
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tag = Collation.tagFromCE32(ce32);
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.PREFIX_TAG);
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.CONTRACTION_TAG);
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Currently, the tailoring data builder does not write offset tags.
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // They might be useful for saving space,
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // but they would complicate the builder,
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // and in tailorings we assume that performance of tailored characters is more important.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (tag != Collation.OFFSET_TAG);
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tag = -1;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int baseTag;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isSpecialCE32(baseCE32)) {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseTag = Collation.tagFromCE32(baseCE32);
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (baseTag != Collation.PREFIX_TAG);
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            assert (baseTag != Collation.CONTRACTION_TAG);
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            baseTag = -1;
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Non-contextual mappings, expansions, etc.
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (baseTag == Collation.OFFSET_TAG) {
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // We might be comparing a tailoring CE which is a copy of
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // a base offset-tag CE, via the [optimize [set]] syntax
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // or when a single-character mapping was copied for tailored contractions.
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Offset tags always result in long-primary CEs,
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // with common secondary/tertiary weights.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (!Collation.isLongPrimaryCE32(ce32)) {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long dataCE = baseData.ces[Collation.indexFromCE32(baseCE32)];
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            long p = Collation.getThreeBytePrimaryForOffsetData(c, dataCE);
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (Collation.primaryFromLongPrimaryCE32(ce32) != p) {
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (tag != baseTag) {
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            add(c);
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return;
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (tag == Collation.EXPANSION32_TAG) {
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Collation.lengthFromCE32(ce32);
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseLength = Collation.lengthFromCE32(baseCE32);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length != baseLength) {
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx0 = Collation.indexFromCE32(ce32);
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx1 = Collation.indexFromCE32(baseCE32);
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (data.ce32s[idx0 + i] != baseData.ce32s[idx1 + i]) {
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    add(c);
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (tag == Collation.EXPANSION_TAG) {
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Collation.lengthFromCE32(ce32);
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int baseLength = Collation.lengthFromCE32(baseCE32);
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length != baseLength) {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return;
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx0 = Collation.indexFromCE32(ce32);
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx1 = Collation.indexFromCE32(baseCE32);
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (data.ces[idx0 + i] != baseData.ces[idx1 + i]) {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    add(c);
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (tag == Collation.HANGUL_TAG) {
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder jamos = new StringBuilder();
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = Hangul.decompose(c, jamos);
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tailored.contains(jamos.charAt(0)) || tailored.contains(jamos.charAt(1))
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    || (length == 3 && tailored.contains(jamos.charAt(2)))) {
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(c);
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if (ce32 != baseCE32) {
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            add(c);
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void comparePrefixes(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parallel iteration over prefixes of both tables.
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator basePrefixes = new CharsTrie(q, qidx).iterator();
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String tp = null; // Tailoring prefix.
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String bp = null; // Base prefix.
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Use a string with a U+FFFF as the limit sentinel.
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // U+FFFF is untailorable and will not occur in prefixes.
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String none = "\uffff";
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Entry te = null, be = null;
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tp == null) {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (prefixes.hasNext()) {
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = prefixes.next();
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tp = te.chars.toString();
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = null;
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tp = none;
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (bp == null) {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (basePrefixes.hasNext()) {
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = basePrefixes.next();
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bp = be.chars.toString();
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = null;
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bp = none;
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (tp == none && bp == none) {
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cmp = tp.compareTo(bp);
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cmp < 0) {
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // tp occurs in the tailoring but not in the base.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (te != null);
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefix(data, tp, c, te.value);
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = null;
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tp = null;
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cmp > 0) {
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // bp occurs in the base but not in the tailoring.
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (be != null);
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addPrefix(baseData, bp, c, be.value);
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                be = null;
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bp = null;
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                setPrefix(tp);
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (te != null && be != null);
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(c, te.value, be.value);
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                resetPrefix();
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = be = null;
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tp = bp = null;
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void compareContractions(int c, CharSequence p, int pidx, CharSequence q, int qidx) {
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Parallel iteration over suffixes of both tables.
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator baseSuffixes = new CharsTrie(q, qidx).iterator();
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String ts = null; // Tailoring suffix.
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String bs = null; // Base suffix.
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Use a string with two U+FFFF as the limit sentinel.
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // U+FFFF is untailorable and will not occur in contractions except maybe
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // as a single suffix character for a root-collator boundary contraction.
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        String none = "\uffff\uffff";
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Entry te = null, be = null;
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (;;) {
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ts == null) {
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (suffixes.hasNext()) {
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = suffixes.next();
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ts = te.chars.toString();
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    te = null;
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ts = none;
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (bs == null) {
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (baseSuffixes.hasNext()) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = baseSuffixes.next();
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bs = be.chars.toString();
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    be = null;
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    bs = none;
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (ts == none && bs == none) {
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int cmp = ts.compareTo(bs);
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cmp < 0) {
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // ts occurs in the tailoring but not in the base.
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addSuffix(c, ts);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = null;
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ts = null;
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cmp > 0) {
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // bs occurs in the base but not in the tailoring.
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                addSuffix(c, bs);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                be = null;
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bs = null;
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                suffix = ts;
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                compare(c, te.value, be.value);
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                suffix = null;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                te = be = null;
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ts = bs = null;
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addPrefixes(CollationData d, int c, CharSequence p, int pidx) {
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator prefixes = new CharsTrie(p, pidx).iterator();
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (prefixes.hasNext()) {
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Entry e = prefixes.next();
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addPrefix(d, e.chars, c, e.value);
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addPrefix(CollationData d, CharSequence pfx, int c, int ce32) {
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        setPrefix(pfx);
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ce32 = d.getFinalCE32(ce32);
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (Collation.isContractionCE32(ce32)) {
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int idx = Collation.indexFromCE32(ce32);
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addContractions(c, d.contexts, idx + 2);
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored.add(new StringBuilder(unreversedPrefix.appendCodePoint(c)));
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        resetPrefix();
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addContractions(int c, CharSequence p, int pidx) {
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator suffixes = new CharsTrie(p, pidx).iterator();
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (suffixes.hasNext()) {
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Entry e = suffixes.next();
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            addSuffix(c, e.chars);
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void addSuffix(int c, CharSequence sfx) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        tailored.add(new StringBuilder(unreversedPrefix).appendCodePoint(c).append(sfx));
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void add(int c) {
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (unreversedPrefix.length() == 0 && suffix == null) {
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailored.add(c);
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder s = new StringBuilder(unreversedPrefix);
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            s.appendCodePoint(c);
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (suffix != null) {
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                s.append(suffix);
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            tailored.add(s);
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Prefixes are reversed in the data structure.
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void setPrefix(CharSequence pfx) {
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.setLength(0);
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.append(pfx).reverse();
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private void resetPrefix() {
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        unreversedPrefix.setLength(0);
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
395