12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
5f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert* Copyright (C) 2010-2015, International Business Machines
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Collation.java, ported from collation.h/.cpp
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation v2 basic definitions and static helper functions.
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Data structures except for expansion tables store 32-bit CEs which are
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * either specials (see tags below) or are compact forms of 64-bit CEs.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class Collation {
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** UChar32 U_SENTINEL.
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * TODO: Create a common, public constant?
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int SENTINEL_CP = -1;
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C compare() API returns enum UCollationResult values (with UCOL_ prefix).
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4J just returns int. We use these constants for ease of porting.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int LESS = -1;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int EQUAL = 0;
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int GREATER = 1;
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Special sort key bytes for all levels.
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TERMINATOR_BYTE = 0;
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int LEVEL_SEPARATOR_BYTE = 1;
37f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
38f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    /** The secondary/tertiary lower limit for tailoring before any root elements. */
39f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int BEFORE_WEIGHT16 = 0x100;
40f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Merge-sort-key separator.
43f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Same as the unique primary and identical-level weights of U+FFFE.
44f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Must not be used as primary compression low terminator.
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable.
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MERGE_SEPARATOR_BYTE = 2;
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE
49f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int MERGE_SEPARATOR_CE32 = 0x02000505;  // U+FFFE
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reserved value in primary second byte if the lead byte is compressible.
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable in all CE weight bytes.
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_COMPRESSION_LOW_BYTE = 3;
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Primary compression high terminator.
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reserved value in primary second byte if the lead byte is compressible.
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable in all CE weight bytes.
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_COMPRESSION_HIGH_BYTE = 0xff;
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Default secondary/tertiary weight lead byte. */
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_BYTE = 5;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMMON_WEIGHT16 = 0x0500;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Middle 16 bits of a CE with a common secondary weight. */
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_SECONDARY_CE = 0x05000000;
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Lower 16 bits of a CE with a common tertiary weight. */
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_TERTIARY_CE = 0x0500;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Lower 32 bits of a CE with common secondary and tertiary weights. */
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMMON_SEC_AND_TER_CE = 0x05000500;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_MASK = 0xffff0000;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_MASK = 0xc000;
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Only the 2*6 bits for the pure tertiary weight. */
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int ONLY_TERTIARY_MASK = 0x3f3f;
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Only the secondary & tertiary bits; no case, no quaternary. */
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK;
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Case bits and tertiary bits. */
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK;
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int QUATERNARY_MASK = 0xc0;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Case bits and quaternary bits. */
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK;
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * First unassigned: AlphabeticIndex overflow boundary.
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We want a 3-byte primary so that it fits into the root elements table.
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This 3-byte primary will not collide with
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * any unassigned-implicit 4-byte primaries because
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the first few hundred Unicode code points all have real mappings.
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long FIRST_UNASSIGNED_PRIMARY = 0xfe040200L;
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int TRAIL_WEIGHT_BYTE = 0xff;  // not compressible
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long FIRST_TRAILING_PRIMARY = 0xff020200L;  // [first trailing]
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long MAX_PRIMARY = 0xffff0000L;  // U+FFFF
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD).
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+).
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long FFFD_PRIMARY = MAX_PRIMARY - 0x20000;
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000;
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A CE32 is special if its low byte is this or greater.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Impossible case bits 11 mark special CE32s.
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This value itself is used to indicate a fallback to the base collator.
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SPECIAL_CE32_LOW_BYTE = 0xc0;
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE;
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Low byte of a long-primary special CE32.
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigned-implicit CE.
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_CE32 = 1;
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** No CE: End of input. Only used in runtime code, not stored in data. */
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long NO_CE_PRIMARY = 1;  // not a left-adjusted weight
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR_BYTE
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long NO_CE = 0x101000100L;  // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Sort key levels. */
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Unspecified level. */
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int NO_LEVEL = 0;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_LEVEL = 1;
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int SECONDARY_LEVEL = 2;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_LEVEL = 3;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TERTIARY_LEVEL = 4;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int QUATERNARY_LEVEL = 5;
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IDENTICAL_LEVEL = 6;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Beyond sort key bytes. */
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int ZERO_LEVEL = 7;
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sort key level flags: xx_FLAG = 1 << xx_LEVEL.
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets.
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_LEVEL_FLAG = 1;
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int PRIMARY_LEVEL_FLAG = 2;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_LEVEL_FLAG = 4;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CASE_LEVEL_FLAG = 8;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int TERTIARY_LEVEL_FLAG = 0x10;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int QUATERNARY_LEVEL_FLAG = 0x20;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int IDENTICAL_LEVEL_FLAG = 0x40;
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int ZERO_LEVEL_FLAG = 0x80;
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8 are available for tag-specific data.
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Fall back to the base collator.
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32.
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Unused, 0.
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FALLBACK_TAG = 0;
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Long-primary CE with COMMON_SEC_AND_TER_CE.
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Three-byte primary.
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_PRIMARY_TAG = 1;
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Long-secondary CE with zero primary.
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..16: Secondary weight.
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 15.. 8: Tertiary weight.
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_SECONDARY_TAG = 2;
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unused.
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG),
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * storing the secondary in bits 31..24, the ccc in bits 23..16,
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the tertiary in bits 15..8.
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int RESERVED_TAG_3 = 3;
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..24: Single-byte primary weight pp of the first CE.
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 23..16: Tertiary weight tt of the first CE.
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 15.. 8: Secondary weight ss of the second CE.
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LATIN_EXPANSION_TAG = 4;
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into int table.
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Length=1..31.
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int EXPANSION32_TAG = 5;
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to one or more 64-bit CEs.
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into CE table.
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Length=1..31.
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int EXPANSION_TAG = 6;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Builder data, used only in the CollationDataBuilder, not in runtime data.
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character.
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 9: Unused, 0.
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value.
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The builder fetches the Jamo CE32 from the trie.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Jamo code point.
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 9: Unused, 0.
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int BUILDER_DATA_TAG = 7;
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to prefix trie.
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into prefix/contraction data.
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Unused, 0.
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int PREFIX_TAG = 8;
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to contraction data.
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into prefix/contraction data.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12..11: Unused, 0.
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      10: CONTRACT_TRAILING_CCC flag.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit       9: CONTRACT_NEXT_CCC flag.
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag.
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACTION_TAG = 9;
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decimal digit.
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into int table for non-numeric-collation CE32.
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      12: Unused, 0.
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 11.. 8: Digit value 0..9.
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int DIGIT_TAG = 10;
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for U+0000, for moving the NUL-termination handling
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from the regular fastpath into specials-handling code.
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Unused, 0.
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int U0000_TAG = 11;
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for a Hangul syllable.
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..9: Unused, 0.
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      8: HANGUL_NO_SPECIAL_JAMO flag.
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int HANGUL_TAG = 12;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for a lead surrogate code unit.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Optional optimization for UTF-16 string processing.
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..10: Unused, 0.
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *       9.. 8: =0: All associated supplementary code points are unassigned-implict.
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              =1: All associated supplementary code points fall back to the base data.
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              else: (Normally 2) Look up the data for the supplementary code point.
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_SURROGATE_TAG = 13;
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for CEs with primary weights in code point order.
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into CE table, for one data "CE".
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Unused, 0.
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This data "CE" has the following bit fields:
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 63..32: Three-byte primary pppppp00.
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      31.. 8: Start/base code point of the in-order range.
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *           7: Flag isCompressible primary.
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *       6.. 0: Per-code point primary-weight increment.
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int OFFSET_TAG = 14;
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implicit CE tag. Compute an unassigned-implicit CE.
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * All bits are set (UNASSIGNED_CE32=0xffffffff).
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int IMPLICIT_TAG = 15;
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isAssignedCE32(int ce32) {
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32;
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We limit the number of CEs in an expansion
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * so that we can use a small number of length bits in the data structure,
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and so that an implementation can copy CEs at runtime without growing a destination buffer.
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_EXPANSION_LENGTH = 31;
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_INDEX = 0x7ffff;
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set if there is no match for the single (no-suffix) character itself.
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is only possible if there is a prefix.
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In this case, discontiguous contraction matching cannot add combining marks
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * starting from an empty suffix.
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default CE32 is used anyway if there is no suffix match.
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_SINGLE_CP_NO_MATCH = 0x100;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Set if the first character of every contraction suffix has lccc!=0. */
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_NEXT_CCC = 0x200;
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Set if any contraction suffix ends with lccc!=0. */
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_TRAILING_CCC = 0x400;
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int HANGUL_NO_SPECIAL_JAMO = 0x100;
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_ALL_UNASSIGNED = 0;
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_ALL_FALLBACK = 0x100;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_MIXED = 0x200;
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_TYPE_MASK = 0x300;
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeLongPrimaryCE32(long p) { return (int)(p | LONG_PRIMARY_CE32_LOW_BYTE); }
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Turns the long-primary CE32 into a primary weight pppppp00. */
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long primaryFromLongPrimaryCE32(int ce32) {
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)ce32 & 0xffffff00L;
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromLongPrimaryCE32(int ce32) {
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE;
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeLongSecondaryCE32(int lower32) {
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG;
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromLongSecondaryCE32(int ce32) {
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)ce32 & 0xffffff00L;
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Makes a special CE32 with tag, index and length. */
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeCE32FromTagIndexAndLength(int tag, int index, int length) {
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag;
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Makes a special CE32 with only tag and index. */
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeCE32FromTagAndIndex(int tag, int index) {
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag;
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSpecialCE32(int ce32) {
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE;
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int tagFromCE32(int ce32) {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 & 0xf;
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean hasCE32Tag(int ce32, int tag) {
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag;
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isLongPrimaryCE32(int ce32) {
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, LONG_PRIMARY_TAG);
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSimpleOrLongCE32(int ce32) {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isSpecialCE32(ce32) ||
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_SECONDARY_TAG;
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if the ce32 yields one or more CEs without further data lookups
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSelfContainedCE32(int ce32) {
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isSpecialCE32(ce32) ||
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_SECONDARY_TAG ||
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LATIN_EXPANSION_TAG;
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isPrefixCE32(int ce32) {
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, PREFIX_TAG);
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isContractionCE32(int ce32) {
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, CONTRACTION_TAG);
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean ce32HasContext(int ce32) {
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isSpecialCE32(ce32) &&
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (tagFromCE32(ce32) == PREFIX_TAG ||
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == CONTRACTION_TAG);
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the first of the two Latin-expansion CEs encoded in ce32.
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see LATIN_EXPANSION_TAG
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long latinCE0FromCE32(int ce32) {
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8);
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the second of the two Latin-expansion CEs encoded in ce32.
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see LATIN_EXPANSION_TAG
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long latinCE1FromCE32(int ce32) {
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (((long)ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE;
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the data index from a special CE32.
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int indexFromCE32(int ce32) {
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 >>> 13;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the data length from a ce32.
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int lengthFromCE32(int ce32) {
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce32 >> 8) & 31;
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the digit value from a DIGIT_TAG ce32.
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static char digitFromCE32(int ce32) {
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (char)((ce32 >> 8) & 0xf);
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns a 64-bit CE from a simple CE32 (not special). */
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromSimpleCE32(int ce32) {
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // normal form ppppsstt -> pppp0000ss00tt00
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE;
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8);
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromCE32(int ce32) {
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int tertiary = ce32 & 0xff;
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(tertiary < SPECIAL_CE32_LOW_BYTE) {
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // normal form ppppsstt -> pppp0000ss00tt00
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (tertiary << 8);
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 -= tertiary;
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((tertiary & 0xf) == LONG_PRIMARY_TAG) {
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // long-primary form ppppppC1 -> pppppp00050000500
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ((long)ce32 << 32) | COMMON_SEC_AND_TER_CE;
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // long-secondary form ssssttC2 -> 00000000sssstt00
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (tertiary & 0xf) == LONG_SECONDARY_TAG;
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ce32 & 0xffffffffL;
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Creates a CE from a primary weight. */
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long makeCE(long p) {
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (p << 32) | COMMON_SEC_AND_TER_CE;
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creates a CE from a primary weight,
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 16-bit secondary/tertiary weights, and a 2-bit quaternary.
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long makeCE(long p, int s, int t, int q) {
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (p << 32) | ((long)s << 16) | t | (q << 6);
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Increments a 2-byte primary by a code point offset.
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long incTwoBytePrimaryByOffset(long basePrimary, boolean isCompressible,
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              int offset) {
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the second byte, minus the minimum byte value,
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // plus the offset, modulo the number of usable byte values, plus the minimum.
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary;
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary = ((offset % 251) + 4) << 16;
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 251;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary = ((offset % 254) + 2) << 16;
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 254;
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further overflow.
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Increments a 3-byte primary by a code point offset.
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long incThreeBytePrimaryByOffset(long basePrimary, boolean isCompressible,
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                int offset) {
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the third byte, minus the minimum byte value,
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // plus the offset, modulo the number of usable byte values, plus the minimum.
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offset += ((int)(basePrimary >> 8) & 0xff) - 2;
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary = ((offset % 254) + 2) << 8;
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offset /= 254;
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Same with the second byte,
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary |= ((offset % 251) + 4) << 16;
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 251;
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary |= ((offset % 254) + 2) << 16;
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 254;
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further overflow.
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decrements a 2-byte primary by one range step (1..0x7f).
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long decTwoBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the second byte, minus the minimum byte value,
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // minus the step, modulo the number of usable byte values, plus the minimum.
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Assume no further underflow for the first byte.
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 < step && step <= 0x7f);
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte2 = ((int)(basePrimary >> 16) & 0xff) - step;
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 4) {
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 += 251;
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 2) {
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 += 254;
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (basePrimary & 0xff000000L) | (byte2 << 16);
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decrements a 3-byte primary by one range step (1..0x7f).
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long decThreeBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the third byte, minus the minimum byte value,
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // minus the step, modulo the number of usable byte values, plus the minimum.
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 < step && step <= 0x7f);
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte3 = ((int)(basePrimary >> 8) & 0xff) - step;
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(byte3 >= 2) {
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (basePrimary & 0xffff0000L) | (byte3 << 8);
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte3 += 254;
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Same with the second byte,
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte2 = ((int)(basePrimary >> 16) & 0xff) - 1;
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 4) {
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 = 0xfe;
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 2) {
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 = 0xff;
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further underflow.
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (basePrimary & 0xff000000L) | (byte2 << 16) | (byte3 << 8);
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Computes a 3-byte primary for c's OFFSET_TAG data "CE".
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long getThreeBytePrimaryForOffsetData(int c, long dataCE) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long p = dataCE >>> 32;  // three-byte primary pppppp00
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lower32 = (int)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isCompressible = (lower32 & 0x80) != 0;
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return Collation.incThreeBytePrimaryByOffset(p, isCompressible, offset);
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the unassigned-character implicit primary weight for any valid code point c.
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long unassignedPrimaryFromCodePoint(int c) {
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Create a gap before U+0000. Use c=-1 for [first unassigned].
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ++c;
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Fourth byte: 18 values, every 14th byte value (gap of 13).
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary = 2 + (c % 18) * 14;
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c /= 18;
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Third byte: 254 values.
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        primary |= (2 + (c % 254)) << 8;
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c /= 254;
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Second byte: 251 values 04..FE excluding the primary compression bytes.
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        primary |= (4 + (c % 251)) << 16;
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((long)UNASSIGNED_IMPLICIT_BYTE << 24);
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long unassignedCEFromCodePoint(int c) {
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return makeCE(unassignedPrimaryFromCodePoint(c));
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private Collation()  // No instantiation.
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
594