12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
5f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert* Copyright (C) 2010-2015, International Business Machines
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* CollationData.java, ported from collationdata.h/.cpp
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Normalizer2Impl;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Trie2_32;
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.lang.UScript;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.Collator;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.text.UnicodeSet;
21f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubertimport com.ibm.icu.util.ICUException;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation data container.
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Immutable data created by a CollationDataBuilder, or loaded from a file,
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or deserialized from API-provided binary data.
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Includes data for the collation base (root/default), aliased if this is not the base.
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class CollationData {
31f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    // Note: The ucadata.icu loader could discover the reserved ranges by setting an array
32f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    // parallel with the ranges, and resetting ranges that are indexed.
33f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    // The reordering builder code could clone the resulting template array.
34f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int REORDER_RESERVED_BEFORE_LATIN = Collator.ReorderCodes.FIRST + 14;
35f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int REORDER_RESERVED_AFTER_LATIN = Collator.ReorderCodes.FIRST + 15;
36f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
37f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int MAX_NUM_SPECIAL_REORDER_CODES = 8;
38f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    CollationData(Normalizer2Impl nfc) {
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        nfcImpl = nfc;
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getCE32(int c) {
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return trie.get(c);
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getCE32FromSupplementary(int c) {
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return trie.get(c);  // TODO: port UTRIE2_GET32_FROM_SUPP(trie, c) to Java?
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    boolean isDigit(int c) {
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return c < 0x660 ? c <= 0x39 && 0x30 <= c :
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Collation.hasCE32Tag(getCE32(c), Collation.DIGIT_TAG);
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isUnsafeBackward(int c, boolean numeric) {
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return unsafeBackwardSet.contains(c) || (numeric && isDigit(c));
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCompressibleLeadByte(int b) {
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return compressibleBytes[b];
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean isCompressiblePrimary(long p) {
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isCompressibleLeadByte((int)p >>> 24);
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the CE32 from two contexts words.
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Access to the defaultCE32 for contraction and prefix matching.
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getCE32FromContexts(int index) {
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((int)contexts.charAt(index) << 16) | contexts.charAt(index + 1);
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG).
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Requires that ce32 is special.
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getIndirectCE32(int ce32) {
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(Collation.isSpecialCE32(ce32));
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int tag = Collation.tagFromCE32(ce32);
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(tag == Collation.DIGIT_TAG) {
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the non-numeric-collation CE32.
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = ce32s[Collation.indexFromCE32(ce32)];
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(tag == Collation.LEAD_SURROGATE_TAG) {
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = Collation.UNASSIGNED_CE32;
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else if(tag == Collation.U0000_TAG) {
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fetch the normal ce32 for U+0000.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = ce32s[0];
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32;
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG),
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * if ce32 is special.
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getFinalCE32(int ce32) {
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(Collation.isSpecialCE32(ce32)) {
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = getIndirectCE32(ce32);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Computes a CE from c's ce32 which has the OFFSET_TAG.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    long getCEFromOffsetCE32(int c, int ce32) {
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long dataCE = ces[Collation.indexFromCE32(ce32)];
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return Collation.makeCE(Collation.getThreeBytePrimaryForOffsetData(c, dataCE));
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the single CE that c maps to.
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Throws UnsupportedOperationException if c does not map to a single CE.
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    long getSingleCE(int c) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CollationData d;
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ce32 = getCE32(c);
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(ce32 == Collation.FALLBACK_CE32) {
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            d = base;
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 = base.getCE32(c);
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            d = this;
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while(Collation.isSpecialCE32(ce32)) {
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            switch(Collation.tagFromCE32(ce32)) {
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.LATIN_EXPANSION_TAG:
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.BUILDER_DATA_TAG:
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.PREFIX_TAG:
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.CONTRACTION_TAG:
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.HANGUL_TAG:
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.LEAD_SURROGATE_TAG:
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException(String.format(
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        c, ce32));
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.FALLBACK_TAG:
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.RESERVED_TAG_3:
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new AssertionError(String.format(
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "unexpected CE32 tag for U+%04X (CE32 0x%08x)", c, ce32));
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.LONG_PRIMARY_TAG:
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collation.ceFromLongPrimaryCE32(ce32);
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.LONG_SECONDARY_TAG:
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collation.ceFromLongSecondaryCE32(ce32);
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.EXPANSION32_TAG:
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(Collation.lengthFromCE32(ce32) == 1) {
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    break;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new UnsupportedOperationException(String.format(
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c, ce32));
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.EXPANSION_TAG: {
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if(Collation.lengthFromCE32(ce32) == 1) {
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return d.ces[Collation.indexFromCE32(ce32)];
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    throw new UnsupportedOperationException(String.format(
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            c, ce32));
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.DIGIT_TAG:
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fetch the non-numeric-collation CE32 and continue.
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.U0000_TAG:
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert(c == 0);
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Fetch the normal ce32 for U+0000 and continue.
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ce32 = d.ce32s[0];
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.OFFSET_TAG:
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return d.getCEFromOffsetCE32(c, ce32);
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            case Collation.IMPLICIT_TAG:
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return Collation.unassignedCEFromCodePoint(c);
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return Collation.ceFromSimpleCE32(ce32);
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the FCD16 value for code point c. c must be >= 0.
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int getFCD16(int c) {
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return nfcImpl.getFCD16(c);
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the first primary for the script's reordering group.
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the primary with only the first primary lead byte of the group
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         (not necessarily an actual root collator primary weight),
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         or 0 if the script is unknown
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    long getFirstPrimaryForGroup(int script) {
196f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int index = getScriptIndex(script);
197f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        return index == 0 ? 0 : (long)scriptStarts[index] << 16;
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the last primary for the script's reordering group.
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the last primary of the group
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         (not an actual root collator primary weight),
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *         or 0 if the script is unknown
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public long getLastPrimaryForGroup(int script) {
207f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int index = getScriptIndex(script);
208f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(index == 0) {
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return 0;
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
211f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        long limit = scriptStarts[index + 1];
212f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        return (limit << 16) - 1;
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Finds the reordering group which contains the primary weight.
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return the first script of the group, or -1 if the weight is beyond the last group
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int getGroupForPrimary(long p) {
220f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        p >>= 16;
221f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(p < scriptStarts[1] || scriptStarts[scriptStarts.length - 1] <= p) {
222f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return -1;
223f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
224f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int index = 1;
225f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        while(p >= scriptStarts[index + 1]) { ++index; }
226f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 0; i < numScripts; ++i) {
227f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(scriptsIndex[i] == index) {
228f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return i;
229f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
230f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
231f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
232f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(scriptsIndex[numScripts + i] == index) {
233f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return Collator.ReorderCodes.FIRST + i;
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return -1;
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
239f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    private int getScriptIndex(int script) {
240f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(script < 0) {
241f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return 0;
242f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else if(script < numScripts) {
243f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return scriptsIndex[script];
244f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else if(script < Collator.ReorderCodes.FIRST) {
245f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return 0;
246f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        } else {
247f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            script -= Collator.ReorderCodes.FIRST;
248f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
249f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return scriptsIndex[numScripts + script];
250f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            } else {
251f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return 0;
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int[] getEquivalentScripts(int script) {
257f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int index = getScriptIndex(script);
258f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(index == 0) { return EMPTY_INT_ARRAY; }
259f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(script >= Collator.ReorderCodes.FIRST) {
260f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            // Special groups have no aliases.
261f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return new int[] { script };
262f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
263f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
264f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int length = 0;
265f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 0; i < numScripts; ++i) {
266f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(scriptsIndex[i] == index) {
267f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                ++length;
268f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
269f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
270f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int[] dest = new int[length];
271f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(length == 1) {
272f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            dest[0] = script;
273f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return dest;
274f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
275f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        length = 0;
276f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 0; i < numScripts; ++i) {
277f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(scriptsIndex[i] == index) {
278f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                dest[length++] = i;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return dest;
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
285f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Writes the permutation of primary-weight ranges
286f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * for the given reordering of scripts and groups.
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The caller checks for illegal arguments and
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * takes care of [DEFAULT] and memory allocation.
289f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     *
290f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * <p>Each list element will be a (limit, offset) pair as described
291f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * for the CollationSettings.reorderRanges.
292f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * The list will be empty if no ranges are reordered.
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
294f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    void makeReorderRanges(int[] reorder, UVector32 ranges) {
295f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        makeReorderRanges(reorder, false, ranges);
296f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    }
297f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
298f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    private void makeReorderRanges(int[] reorder, boolean latinMustMove, UVector32 ranges) {
299f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        ranges.removeAllElements();
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int length = reorder.length;
301f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(length == 0 || (length == 1 && reorder[0] == UScript.UNKNOWN)) {
302f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            return;
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
305f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Maps each script-or-group range to a new lead byte.
306f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        short[] table = new short[scriptStarts.length - 1];  // C++: uint8_t[]
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
308f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        {
309f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            // Set "don't care" values for reserved ranges.
310f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int index = scriptsIndex[
311f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    numScripts + REORDER_RESERVED_BEFORE_LATIN - Collator.ReorderCodes.FIRST];
312f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(index != 0) {
313f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                table[index] = 0xff;
314f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
315f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            index = scriptsIndex[
316f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    numScripts + REORDER_RESERVED_AFTER_LATIN - Collator.ReorderCodes.FIRST];
317f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(index != 0) {
318f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                table[index] = 0xff;
319f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
322f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Never reorder special low and high primary lead bytes.
323f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(scriptStarts.length >= 2);
324f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(scriptStarts[0] == 0);
325f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int lowStart = scriptStarts[1];
326f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(lowStart == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8));
327f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int highLimit = scriptStarts[scriptStarts.length - 1];
328f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        assert(highLimit == (Collation.TRAIL_WEIGHT_BYTE << 8));
329f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Get the set of special reorder codes in the input list.
331f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // This supports a fixed number of special reorder codes;
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // it works for data with codes beyond Collator.ReorderCodes.LIMIT.
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int specials = 0;
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < length; ++i) {
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST;
336f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                specials |= 1 << reorderCode;
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Start the reordering with the special low reorder codes that do not occur in the input.
342f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
343f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int index = scriptsIndex[numScripts + i];
344f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(index != 0 && (specials & (1 << i)) == 0) {
345f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                lowStart = addLowScriptRange(table, index, lowStart);
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
349f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Skip the reserved range before Latin if Latin is the first script,
350f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // so that we do not move it unnecessarily.
351f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int skippedReserved = 0;
352f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(specials == 0 && reorder[0] == UScript.LATIN && !latinMustMove) {
353f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int index = scriptsIndex[UScript.LATIN];
354f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            assert(index != 0);
355f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int start = scriptStarts[index];
356f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            assert(lowStart <= start);
357f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            skippedReserved = start - lowStart;
358f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            lowStart = start;
359f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
360f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
361f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Reorder according to the input scripts, continuing from the bottom of the primary range.
362f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        boolean hasReorderToEnd = false;
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for(int i = 0; i < length;) {
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int script = reorder[i++];
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(script == UScript.UNKNOWN) {
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Put the remaining scripts at the top.
367f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                hasReorderToEnd = true;
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while(i < length) {
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    script = reorder[--length];
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(script == UScript.UNKNOWN) {  // Must occur at most once.
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        throw new IllegalArgumentException(
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                "setReorderCodes(): duplicate UScript.UNKNOWN");
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if(script == Collator.ReorderCodes.DEFAULT) {
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        throw new IllegalArgumentException(
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                "setReorderCodes(): UScript.DEFAULT together with other scripts");
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
378f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    int index = getScriptIndex(script);
379f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    if(index == 0) { continue; }
380f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    if(table[index] != 0) {  // Duplicate or equivalent script.
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        throw new IllegalArgumentException(
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                "setReorderCodes(): duplicate or equivalent script " +
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                scriptCodeString(script));
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
385f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    highLimit = addHighScriptRange(table, index, highLimit);
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                break;
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(script == Collator.ReorderCodes.DEFAULT) {
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // The default code must be the only one in the list, and that is handled by the caller.
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Otherwise it must not be used.
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException(
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "setReorderCodes(): UScript.DEFAULT together with other scripts");
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
395f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int index = getScriptIndex(script);
396f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(index == 0) { continue; }
397f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(table[index] != 0) {  // Duplicate or equivalent script.
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException(
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        "setReorderCodes(): duplicate or equivalent script " +
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        scriptCodeString(script));
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
402f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            lowStart = addLowScriptRange(table, index, lowStart);
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Put all remaining scripts into the middle.
406f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 1; i < scriptStarts.length - 1; ++i) {
407f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int leadByte = table[i];
408f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(leadByte != 0) { continue; }
409f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int start = scriptStarts[i];
410f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(!hasReorderToEnd && start > lowStart) {
411f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // No need to move this script.
412f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                lowStart = start;
413f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
414f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            lowStart = addLowScriptRange(table, i, lowStart);
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
416f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if(lowStart > highLimit) {
417f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
418f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                // Try not skipping the before-Latin reserved range.
419f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                makeReorderRanges(reorder, true, ranges);
420f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                return;
421f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
422f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            // We need more primary lead bytes than available, despite the reserved ranges.
423f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            throw new ICUException(
424f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    "setReorderCodes(): reordering too many partial-primary-lead-byte scripts");
425f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
426f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
427f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Turn lead bytes into a list of (limit, offset) pairs.
428f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Encode each pair in one list element:
429f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        // Upper 16 bits = limit, lower 16 = signed lead byte offset.
430f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int offset = 0;
431f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        for(int i = 1;; ++i) {
432f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            int nextOffset = offset;
433f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            while(i < scriptStarts.length - 1) {
434f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                int newLeadByte = table[i];
435f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                if(newLeadByte == 0xff) {
436f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    // "Don't care" lead byte for reserved range, continue with current offset.
437f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                } else {
438f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    nextOffset = newLeadByte - (scriptStarts[i] >> 8);
439f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                    if(nextOffset != offset) { break; }
440f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                }
441f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                ++i;
442f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
443f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(offset != 0 || i < scriptStarts.length - 1) {
444f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert                ranges.addElement(((int)scriptStarts[i] << 16) | (offset & 0xffff));
445f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            }
446f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            if(i == scriptStarts.length - 1) { break; }
447f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            offset = nextOffset;
448f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
449f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    }
450f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
451f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    private int addLowScriptRange(short[] table, int index, int lowStart) {
452f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int start = scriptStarts[index];
453f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if((start & 0xff) < (lowStart & 0xff)) {
454f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            lowStart += 0x100;
455f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
456f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        table[index] = (short)(lowStart >> 8);
457f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int limit = scriptStarts[index + 1];
458f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
459f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        return lowStart;
460f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    }
461f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
462f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    private int addHighScriptRange(short[] table, int index, int highLimit) {
463f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int limit = scriptStarts[index + 1];
464f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        if((limit & 0xff) > (highLimit & 0xff)) {
465f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert            highLimit -= 0x100;
466f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        }
467f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        int start = scriptStarts[index];
468f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
469f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        table[index] = (short)(highLimit >> 8);
470f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert        return highLimit;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static String scriptCodeString(int script) {
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Do not use the script name here: We do not want to depend on that data.
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (script < Collator.ReorderCodes.FIRST) ?
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Integer.toString(script) : "0x" + Integer.toHexString(script);
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final int[] EMPTY_INT_ARRAY = new int[0];
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** @see jamoCE32s */
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int JAMO_CE32S_LENGTH = 19 + 21 + 27;
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Main lookup trie. */
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    Trie2_32 trie;
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Array of CE32 values.
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * At index 0 there must be CE32(U+0000)
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * to support U+0000's special-tag for NUL-termination handling.
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int[] ce32s;
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Array of CE values for expansions and OFFSET_TAG. */
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    long[] ces;
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Array of prefix and contraction-suffix matching data. */
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    String contexts;
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Base collation data, or null if this data itself is a base. */
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public CollationData base;
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T.
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * They are normally simple CE32s, rarely expansions.
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * For fast handling of HANGUL_TAG.
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    int[] jamoCE32s = new int[JAMO_CE32S_LENGTH];
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public Normalizer2Impl nfcImpl;
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** The single-byte primary weight (xx000000) for numeric collation. */
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    long numericPrimary = 0x12000000;
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** 256 flags for which primary-weight lead bytes are compressible. */
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public boolean[] compressibleBytes;
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set of code points that are unsafe for starting string comparison after an identical prefix,
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * or in backwards CE iteration.
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    UnicodeSet unsafeBackwardSet;
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Fast Latin table for common-Latin-text string comparisons.
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Data structure see class CollationFastLatin.
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public char[] fastLatinTable;
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Header portion of the fastLatinTable.
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In C++, these are one array, and the header is skipped for mapping characters.
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In Java, two arrays work better.
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    char[] fastLatinTableHeader;
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Data for scripts and reordering groups.
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Uses include building a reordering permutation table and
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * providing script boundaries to AlphabeticIndex.
532f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     */
533f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    int numScripts;
534f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    /**
535f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * The length of scriptsIndex is numScripts+16.
536f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
537f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * 16 special reorder codes (not all used) are mapped starting at numScripts.
538f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
539f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * There are special codes at the end for reorder-reserved primary ranges.
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
541f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * <p>Multiple scripts may share a range and index, for example Hira & Kana.
542f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     */
543f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    char[] scriptsIndex;
544f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    /**
545f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Start primary weight (top 16 bits only) for a group/script/reserved range
546f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * indexed by scriptsIndex.
547f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * The first range (separators & terminators) and the last range (trailing weights)
548f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * are not reorderable, and no scriptsIndex entry points to them.
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
550f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    char[] scriptStarts;
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Collation elements in the root collator.
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Used by the CollationRootElements class. The data structure is described there.
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * null in a tailoring.
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public long[] rootElements;
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
559