17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
3f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert* Copyright (C) 2010-2015, International Business Machines
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*******************************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Collation.java, ported from collation.h/.cpp
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* C++ version created on: 2010oct27
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.impl.coll;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Collation v2 basic definitions and static helper functions.
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Data structures except for expansion tables store 32-bit CEs which are
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * either specials (see tags below) or are compact forms of 64-bit CEs.
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class Collation {
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** UChar32 U_SENTINEL.
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * TODO: Create a common, public constant?
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int SENTINEL_CP = -1;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4C compare() API returns enum UCollationResult values (with UCOL_ prefix).
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // ICU4J just returns int. We use these constants for ease of porting.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int LESS = -1;
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int EQUAL = 0;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int GREATER = 1;
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // Special sort key bytes for all levels.
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TERMINATOR_BYTE = 0;
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int LEVEL_SEPARATOR_BYTE = 1;
35f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
36f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    /** The secondary/tertiary lower limit for tailoring before any root elements. */
37f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int BEFORE_WEIGHT16 = 0x100;
38f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Merge-sort-key separator.
41f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Same as the unique primary and identical-level weights of U+FFFE.
42f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert     * Must not be used as primary compression low terminator.
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable.
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int MERGE_SEPARATOR_BYTE = 2;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE
47f716bda031dccdec5e47bb40e758c5901d209729Fredrik Roubert    static final int MERGE_SEPARATOR_CE32 = 0x02000505;  // U+FFFE
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reserved value in primary second byte if the lead byte is compressible.
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable in all CE weight bytes.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_COMPRESSION_LOW_BYTE = 3;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Primary compression high terminator.
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Reserved value in primary second byte if the lead byte is compressible.
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Otherwise usable in all CE weight bytes.
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_COMPRESSION_HIGH_BYTE = 0xff;
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Default secondary/tertiary weight lead byte. */
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_BYTE = 5;
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMMON_WEIGHT16 = 0x0500;
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Middle 16 bits of a CE with a common secondary weight. */
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_SECONDARY_CE = 0x05000000;
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Lower 16 bits of a CE with a common tertiary weight. */
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int COMMON_TERTIARY_CE = 0x0500;
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Lower 32 bits of a CE with common secondary and tertiary weights. */
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int COMMON_SEC_AND_TER_CE = 0x05000500;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_MASK = 0xffff0000;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_MASK = 0xc000;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Only the 2*6 bits for the pure tertiary weight. */
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int ONLY_TERTIARY_MASK = 0x3f3f;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Only the secondary & tertiary bits; no case, no quaternary. */
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK;
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Case bits and tertiary bits. */
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK;
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int QUATERNARY_MASK = 0xc0;
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Case bits and quaternary bits. */
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK;
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * First unassigned: AlphabeticIndex overflow boundary.
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We want a 3-byte primary so that it fits into the root elements table.
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This 3-byte primary will not collide with
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * any unassigned-implicit 4-byte primaries because
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the first few hundred Unicode code points all have real mappings.
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long FIRST_UNASSIGNED_PRIMARY = 0xfe040200L;
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int TRAIL_WEIGHT_BYTE = 0xff;  // not compressible
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long FIRST_TRAILING_PRIMARY = 0xff020200L;  // [first trailing]
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long MAX_PRIMARY = 0xffff0000L;  // U+FFFF
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD).
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+).
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long FFFD_PRIMARY = MAX_PRIMARY - 0x20000;
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000;
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A CE32 is special if its low byte is this or greater.
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Impossible case bits 11 mark special CE32s.
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This value itself is used to indicate a fallback to the base collator.
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SPECIAL_CE32_LOW_BYTE = 0xc0;
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE;
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Low byte of a long-primary special CE32.
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigned-implicit CE.
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_CE32 = 1;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** No CE: End of input. Only used in runtime code, not stored in data. */
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final long NO_CE_PRIMARY = 1;  // not a left-adjusted weight
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR_BYTE
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final long NO_CE = 0x101000100L;  // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Sort key levels. */
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Unspecified level. */
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int NO_LEVEL = 0;
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int PRIMARY_LEVEL = 1;
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int SECONDARY_LEVEL = 2;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int CASE_LEVEL = 3;
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int TERTIARY_LEVEL = 4;
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int QUATERNARY_LEVEL = 5;
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int IDENTICAL_LEVEL = 6;
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Beyond sort key bytes. */
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static final int ZERO_LEVEL = 7;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Sort key level flags: xx_FLAG = 1 << xx_LEVEL.
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets.
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int NO_LEVEL_FLAG = 1;
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int PRIMARY_LEVEL_FLAG = 2;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int SECONDARY_LEVEL_FLAG = 4;
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CASE_LEVEL_FLAG = 8;
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int TERTIARY_LEVEL_FLAG = 0x10;
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int QUATERNARY_LEVEL_FLAG = 0x20;
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int IDENTICAL_LEVEL_FLAG = 0x40;
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int ZERO_LEVEL_FLAG = 0x80;
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8 are available for tag-specific data.
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Fall back to the base collator.
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32.
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Unused, 0.
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int FALLBACK_TAG = 0;
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Long-primary CE with COMMON_SEC_AND_TER_CE.
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Three-byte primary.
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_PRIMARY_TAG = 1;
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Long-secondary CE with zero primary.
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..16: Secondary weight.
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 15.. 8: Tertiary weight.
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LONG_SECONDARY_TAG = 2;
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Unused.
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG),
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * storing the secondary in bits 31..24, the ccc in bits 23..16,
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and the tertiary in bits 15..8.
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int RESERVED_TAG_3 = 3;
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..24: Single-byte primary weight pp of the first CE.
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 23..16: Tertiary weight tt of the first CE.
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 15.. 8: Secondary weight ss of the second CE.
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LATIN_EXPANSION_TAG = 4;
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into int table.
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Length=1..31.
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int EXPANSION32_TAG = 5;
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to one or more 64-bit CEs.
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into CE table.
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Length=1..31.
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int EXPANSION_TAG = 6;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Builder data, used only in the CollationDataBuilder, not in runtime data.
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character.
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 9: Unused, 0.
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value.
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The builder fetches the Jamo CE32 from the trie.
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Jamo code point.
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 9: Unused, 0.
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int BUILDER_DATA_TAG = 7;
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to prefix trie.
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into prefix/contraction data.
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Unused, 0.
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int PREFIX_TAG = 8;
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Points to contraction data.
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into prefix/contraction data.
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12..11: Unused, 0.
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      10: CONTRACT_TRAILING_CCC flag.
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit       9: CONTRACT_NEXT_CCC flag.
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag.
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACTION_TAG = 9;
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decimal digit.
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into int table for non-numeric-collation CE32.
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      12: Unused, 0.
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 11.. 8: Digit value 0..9.
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int DIGIT_TAG = 10;
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for U+0000, for moving the NUL-termination handling
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * from the regular fastpath into specials-handling code.
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..8: Unused, 0.
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int U0000_TAG = 11;
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for a Hangul syllable.
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..9: Unused, 0.
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bit      8: HANGUL_NO_SPECIAL_JAMO flag.
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int HANGUL_TAG = 12;
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for a lead surrogate code unit.
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Optional optimization for UTF-16 string processing.
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..10: Unused, 0.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *       9.. 8: =0: All associated supplementary code points are unassigned-implict.
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              =1: All associated supplementary code points fall back to the base data.
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *              else: (Normally 2) Look up the data for the supplementary code point.
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_SURROGATE_TAG = 13;
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Tag for CEs with primary weights in code point order.
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 31..13: Index into CE table, for one data "CE".
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 12.. 8: Unused, 0.
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This data "CE" has the following bit fields:
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Bits 63..32: Three-byte primary pppppp00.
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *      31.. 8: Start/base code point of the in-order range.
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *           7: Flag isCompressible primary.
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *       6.. 0: Per-code point primary-weight increment.
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int OFFSET_TAG = 14;
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Implicit CE tag. Compute an unassigned-implicit CE.
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * All bits are set (UNASSIGNED_CE32=0xffffffff).
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int IMPLICIT_TAG = 15;
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isAssignedCE32(int ce32) {
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32;
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * We limit the number of CEs in an expansion
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * so that we can use a small number of length bits in the data structure,
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * and so that an implementation can copy CEs at runtime without growing a destination buffer.
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_EXPANSION_LENGTH = 31;
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int MAX_INDEX = 0x7ffff;
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Set if there is no match for the single (no-suffix) character itself.
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * This is only possible if there is a prefix.
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * In this case, discontiguous contraction matching cannot add combining marks
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * starting from an empty suffix.
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * The default CE32 is used anyway if there is no suffix match.
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_SINGLE_CP_NO_MATCH = 0x100;
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Set if the first character of every contraction suffix has lccc!=0. */
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_NEXT_CCC = 0x200;
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Set if any contraction suffix ends with lccc!=0. */
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int CONTRACT_TRAILING_CCC = 0x400;
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int HANGUL_NO_SPECIAL_JAMO = 0x100;
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_ALL_UNASSIGNED = 0;
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_ALL_FALLBACK = 0x100;
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_MIXED = 0x200;
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static final int LEAD_TYPE_MASK = 0x300;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeLongPrimaryCE32(long p) { return (int)(p | LONG_PRIMARY_CE32_LOW_BYTE); }
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Turns the long-primary CE32 into a primary weight pppppp00. */
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long primaryFromLongPrimaryCE32(int ce32) {
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)ce32 & 0xffffff00L;
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromLongPrimaryCE32(int ce32) {
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE;
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeLongSecondaryCE32(int lower32) {
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromLongSecondaryCE32(int ce32) {
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (long)ce32 & 0xffffff00L;
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Makes a special CE32 with tag, index and length. */
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeCE32FromTagIndexAndLength(int tag, int index, int length) {
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag;
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Makes a special CE32 with only tag and index. */
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int makeCE32FromTagAndIndex(int tag, int index) {
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag;
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSpecialCE32(int ce32) {
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE;
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int tagFromCE32(int ce32) {
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 & 0xf;
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean hasCE32Tag(int ce32, int tag) {
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag;
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isLongPrimaryCE32(int ce32) {
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, LONG_PRIMARY_TAG);
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSimpleOrLongCE32(int ce32) {
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isSpecialCE32(ce32) ||
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_SECONDARY_TAG;
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @return true if the ce32 yields one or more CEs without further data lookups
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isSelfContainedCE32(int ce32) {
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return !isSpecialCE32(ce32) ||
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LONG_SECONDARY_TAG ||
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == LATIN_EXPANSION_TAG;
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isPrefixCE32(int ce32) {
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, PREFIX_TAG);
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean isContractionCE32(int ce32) {
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return hasCE32Tag(ce32, CONTRACTION_TAG);
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static boolean ce32HasContext(int ce32) {
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return isSpecialCE32(ce32) &&
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                (tagFromCE32(ce32) == PREFIX_TAG ||
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                tagFromCE32(ce32) == CONTRACTION_TAG);
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the first of the two Latin-expansion CEs encoded in ce32.
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see LATIN_EXPANSION_TAG
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long latinCE0FromCE32(int ce32) {
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8);
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Get the second of the two Latin-expansion CEs encoded in ce32.
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @see LATIN_EXPANSION_TAG
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long latinCE1FromCE32(int ce32) {
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (((long)ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE;
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the data index from a special CE32.
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int indexFromCE32(int ce32) {
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ce32 >>> 13;
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the data length from a ce32.
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static int lengthFromCE32(int ce32) {
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (ce32 >> 8) & 31;
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the digit value from a DIGIT_TAG ce32.
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static char digitFromCE32(int ce32) {
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (char)((ce32 >> 8) & 0xf);
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns a 64-bit CE from a simple CE32 (not special). */
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromSimpleCE32(int ce32) {
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // normal form ppppsstt -> pppp0000ss00tt00
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE;
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8);
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long ceFromCE32(int ce32) {
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int tertiary = ce32 & 0xff;
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(tertiary < SPECIAL_CE32_LOW_BYTE) {
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // normal form ppppsstt -> pppp0000ss00tt00
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return ((long)(ce32 & 0xffff0000) << 32) | ((long)(ce32 & 0xff00) << 16) | (tertiary << 8);
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ce32 -= tertiary;
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if((tertiary & 0xf) == LONG_PRIMARY_TAG) {
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // long-primary form ppppppC1 -> pppppp00050000500
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ((long)ce32 << 32) | COMMON_SEC_AND_TER_CE;
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // long-secondary form ssssttC2 -> 00000000sssstt00
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                assert (tertiary & 0xf) == LONG_SECONDARY_TAG;
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return ce32 & 0xffffffffL;
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /** Creates a CE from a primary weight. */
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long makeCE(long p) {
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (p << 32) | COMMON_SEC_AND_TER_CE;
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Creates a CE from a primary weight,
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 16-bit secondary/tertiary weights, and a 2-bit quaternary.
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long makeCE(long p, int s, int t, int q) {
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (p << 32) | ((long)s << 16) | t | (q << 6);
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Increments a 2-byte primary by a code point offset.
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long incTwoBytePrimaryByOffset(long basePrimary, boolean isCompressible,
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                              int offset) {
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the second byte, minus the minimum byte value,
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // plus the offset, modulo the number of usable byte values, plus the minimum.
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary;
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary = ((offset % 251) + 4) << 16;
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 251;
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary = ((offset % 254) + 2) << 16;
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 254;
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further overflow.
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Increments a 3-byte primary by a code point offset.
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static long incThreeBytePrimaryByOffset(long basePrimary, boolean isCompressible,
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                                int offset) {
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the third byte, minus the minimum byte value,
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // plus the offset, modulo the number of usable byte values, plus the minimum.
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offset += ((int)(basePrimary >> 8) & 0xff) - 2;
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary = ((offset % 254) + 2) << 8;
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        offset /= 254;
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Same with the second byte,
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 4;
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary |= ((offset % 251) + 4) << 16;
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 251;
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset += ((int)(basePrimary >> 16) & 0xff) - 2;
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            primary |= ((offset % 254) + 2) << 16;
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            offset /= 254;
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further overflow.
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((basePrimary & 0xff000000L) + ((long)offset << 24));
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decrements a 2-byte primary by one range step (1..0x7f).
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long decTwoBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the second byte, minus the minimum byte value,
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // minus the step, modulo the number of usable byte values, plus the minimum.
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Assume no further underflow for the first byte.
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 < step && step <= 0x7f);
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte2 = ((int)(basePrimary >> 16) & 0xff) - step;
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 4) {
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 += 251;
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 2) {
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 += 254;
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (basePrimary & 0xff000000L) | (byte2 << 16);
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Decrements a 3-byte primary by one range step (1..0x7f).
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long decThreeBytePrimaryByOneStep(long basePrimary, boolean isCompressible, int step) {
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Extract the third byte, minus the minimum byte value,
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // minus the step, modulo the number of usable byte values, plus the minimum.
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        assert(0 < step && step <= 0x7f);
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte3 = ((int)(basePrimary >> 8) & 0xff) - step;
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(byte3 >= 2) {
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return (basePrimary & 0xffff0000L) | (byte3 << 8);
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte3 += 254;
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Same with the second byte,
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int byte2 = ((int)(basePrimary >> 16) & 0xff) - 1;
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if(isCompressible) {
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 4) {
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 = 0xfe;
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        } else {
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if(byte2 < 2) {
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte2 = 0xff;
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                basePrimary -= 0x1000000;
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // First byte, assume no further underflow.
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return (basePrimary & 0xff000000L) | (byte2 << 16) | (byte3 << 8);
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Computes a 3-byte primary for c's OFFSET_TAG data "CE".
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long getThreeBytePrimaryForOffsetData(int c, long dataCE) {
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long p = dataCE >>> 32;  // three-byte primary pppppp00
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int lower32 = (int)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        boolean isCompressible = (lower32 & 0x80) != 0;
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return Collation.incThreeBytePrimaryByOffset(p, isCompressible, offset);
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Returns the unassigned-character implicit primary weight for any valid code point c.
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long unassignedPrimaryFromCodePoint(int c) {
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Create a gap before U+0000. Use c=-1 for [first unassigned].
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        ++c;
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Fourth byte: 18 values, every 14th byte value (gap of 13).
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        long primary = 2 + (c % 18) * 14;
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c /= 18;
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Third byte: 254 values.
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        primary |= (2 + (c % 254)) << 8;
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        c /= 254;
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Second byte: 251 values 04..FE excluding the primary compression bytes.
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        primary |= (4 + (c % 251)) << 16;
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return primary | ((long)UNASSIGNED_IMPLICIT_BYTE << 24);
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static long unassignedCEFromCodePoint(int c) {
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return makeCE(unassignedPrimaryFromCodePoint(c));
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    // private Collation()  // No instantiation.
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
592