1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2010-2015, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collation.h
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2010oct27
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATION_H__
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATION_H__
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation v2 basic definitions and static helper functions.
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Data structures except for expansion tables store 32-bit CEs which are
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * either specials (see tags below) or are compact forms of 64-bit CEs.
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API Collation {
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Special sort key bytes for all levels.
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t TERMINATOR_BYTE = 0;
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t LEVEL_SEPARATOR_BYTE = 1;
321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    /** The secondary/tertiary lower limit for tailoring before any root elements. */
341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    static const uint32_t BEFORE_WEIGHT16 = 0x0100;
351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Merge-sort-key separator.
381b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert     * Same as the unique primary and identical-level weights of U+FFFE.
391b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert     * Must not be used as primary compression low terminator.
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Otherwise usable.
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t MERGE_SEPARATOR_BYTE = 2;
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE
441b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000505;  // U+FFFE
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Primary compression low terminator, must be greater than MERGE_SEPARATOR_BYTE.
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Reserved value in primary second byte if the lead byte is compressible.
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Otherwise usable in all CE weight bytes.
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t PRIMARY_COMPRESSION_LOW_BYTE = 3;
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Primary compression high terminator.
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Reserved value in primary second byte if the lead byte is compressible.
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Otherwise usable in all CE weight bytes.
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t PRIMARY_COMPRESSION_HIGH_BYTE = 0xff;
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Default secondary/tertiary weight lead byte. */
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t COMMON_BYTE = 5;
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t COMMON_WEIGHT16 = 0x0500;
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Middle 16 bits of a CE with a common secondary weight. */
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t COMMON_SECONDARY_CE = 0x05000000;
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Lower 16 bits of a CE with a common tertiary weight. */
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t COMMON_TERTIARY_CE = 0x0500;
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Lower 32 bits of a CE with common secondary and tertiary weights. */
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t COMMON_SEC_AND_TER_CE = 0x05000500;
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t SECONDARY_MASK = 0xffff0000;
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CASE_MASK = 0xc000;
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK;
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Only the 2*6 bits for the pure tertiary weight. */
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t ONLY_TERTIARY_MASK = 0x3f3f;
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Only the secondary & tertiary bits; no case, no quaternary. */
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MASK;
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Case bits and tertiary bits. */
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MASK;
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t QUATERNARY_MASK = 0xc0;
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Case bits and quaternary bits. */
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK;
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * First unassigned: AlphabeticIndex overflow boundary.
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * We want a 3-byte primary so that it fits into the root elements table.
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * This 3-byte primary will not collide with
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * any unassigned-implicit 4-byte primaries because
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * the first few hundred Unicode code points all have real mappings.
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t FIRST_UNASSIGNED_PRIMARY = 0xfe040200;
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t TRAIL_WEIGHT_BYTE = 0xff;  // not compressible
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t FIRST_TRAILING_PRIMARY = 0xff020200;  // [first trailing]
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t MAX_PRIMARY = 0xffff0000;  // U+FFFF
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which behave like U+FFFD).
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+).
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t FFFD_PRIMARY = MAX_PRIMARY - 0x20000;
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000;
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * A CE32 is special if its low byte is this or greater.
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Impossible case bits 11 mark special CE32s.
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * This value itself is used to indicate a fallback to the base collator.
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t SPECIAL_CE32_LOW_BYTE = 0xc0;
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE;
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Low byte of a long-primary special CE32.
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint8_t LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigned-implicit CE.
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t NO_CE32 = 1;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** No CE: End of input. Only used in runtime code, not stored in data. */
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t NO_CE_PRIMARY = 1;  // not a left-adjusted weight
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR_BYTE
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const int64_t NO_CE = INT64_C(0x101000100);  // NO_CE_PRIMARY, NO_CE_WEIGHT16, NO_CE_WEIGHT16
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Sort key levels. */
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    enum Level {
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /** Unspecified level. */
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        NO_LEVEL,
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        PRIMARY_LEVEL,
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        SECONDARY_LEVEL,
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CASE_LEVEL,
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        TERTIARY_LEVEL,
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        QUATERNARY_LEVEL,
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        IDENTICAL_LEVEL,
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /** Beyond sort key bytes. */
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ZERO_LEVEL
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Sort key level flags: xx_FLAG = 1 << xx_LEVEL.
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * In Java, use enum Level with flag() getters, or use EnumSet rather than hand-made bit sets.
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t NO_LEVEL_FLAG = 1;
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t PRIMARY_LEVEL_FLAG = 2;
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t SECONDARY_LEVEL_FLAG = 4;
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CASE_LEVEL_FLAG = 8;
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t TERTIARY_LEVEL_FLAG = 0x10;
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t QUATERNARY_LEVEL_FLAG = 0x20;
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t IDENTICAL_LEVEL_FLAG = 0x40;
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t ZERO_LEVEL_FLAG = 0x80;
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Bits 31..8 are available for tag-specific data.
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    enum {
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Fall back to the base collator.
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32.
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..8: Unused, 0.
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        FALLBACK_TAG = 0,
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Long-primary CE with COMMON_SEC_AND_TER_CE.
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..8: Three-byte primary.
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LONG_PRIMARY_TAG = 1,
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Long-secondary CE with zero primary.
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..16: Secondary weight.
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 15.. 8: Tertiary weight.
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LONG_SECONDARY_TAG = 2,
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Unused.
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * May be used in the future for single-byte secondary CEs (SHORT_SECONDARY_TAG),
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * storing the secondary in bits 31..24, the ccc in bits 23..16,
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * and the tertiary in bits 15..8.
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        RESERVED_TAG_3 = 3,
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..24: Single-byte primary weight pp of the first CE.
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 23..16: Tertiary weight tt of the first CE.
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 15.. 8: Secondary weight ss of the second CE.
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LATIN_EXPANSION_TAG = 4,
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into uint32_t table.
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 8: Length=1..31.
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        EXPANSION32_TAG = 5,
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Points to one or more 64-bit CEs.
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into CE table.
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 8: Length=1..31.
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        EXPANSION_TAG = 6,
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Builder data, used only in the CollationDataBuilder, not in runtime data.
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index to the builder's list of ConditionalCE32 for this character.
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 9: Unused, 0.
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value.
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The builder fetches the Jamo CE32 from the trie.
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Jamo code point.
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 9: Unused, 0.
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        BUILDER_DATA_TAG = 7,
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Points to prefix trie.
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into prefix/contraction data.
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 8: Unused, 0.
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        PREFIX_TAG = 8,
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Points to contraction data.
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into prefix/contraction data.
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12..11: Unused, 0.
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bit      10: CONTRACT_TRAILING_CCC flag.
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bit       9: CONTRACT_NEXT_CCC flag.
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag.
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CONTRACTION_TAG = 9,
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Decimal digit.
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into uint32_t table for non-numeric-collation CE32.
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bit      12: Unused, 0.
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 11.. 8: Digit value 0..9.
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        DIGIT_TAG = 10,
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Tag for U+0000, for moving the NUL-termination handling
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * from the regular fastpath into specials-handling code.
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..8: Unused, 0.
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U0000_TAG = 11,
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Tag for a Hangul syllable.
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..9: Unused, 0.
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bit      8: HANGUL_NO_SPECIAL_JAMO flag.
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        HANGUL_TAG = 12,
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Tag for a lead surrogate code unit.
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Optional optimization for UTF-16 string processing.
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..10: Unused, 0.
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *       9.. 8: =0: All associated supplementary code points are unassigned-implict.
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *              =1: All associated supplementary code points fall back to the base data.
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *              else: (Normally 2) Look up the data for the supplementary code point.
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        LEAD_SURROGATE_TAG = 13,
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Tag for CEs with primary weights in code point order.
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 31..13: Index into CE table, for one data "CE".
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 12.. 8: Unused, 0.
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * This data "CE" has the following bit fields:
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Bits 63..32: Three-byte primary pppppp00.
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *      31.. 8: Start/base code point of the in-order range.
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *           7: Flag isCompressible primary.
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         *       6.. 0: Per-code point primary-weight increment.
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        OFFSET_TAG = 14,
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Implicit CE tag. Compute an unassigned-implicit CE.
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * All bits are set (UNASSIGNED_CE32=0xffffffff).
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        IMPLICIT_TAG = 15
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isAssignedCE32(uint32_t ce32) {
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32;
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * We limit the number of CEs in an expansion
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * so that we can use a small number of length bits in the data structure,
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * and so that an implementation can copy CEs at runtime without growing a destination buffer.
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const int32_t MAX_EXPANSION_LENGTH = 31;
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const int32_t MAX_INDEX = 0x7ffff;
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Set if there is no match for the single (no-suffix) character itself.
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * This is only possible if there is a prefix.
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * In this case, discontiguous contraction matching cannot add combining marks
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * starting from an empty suffix.
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * The default CE32 is used anyway if there is no suffix match.
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CONTRACT_SINGLE_CP_NO_MATCH = 0x100;
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Set if the first character of every contraction suffix has lccc!=0. */
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CONTRACT_NEXT_CCC = 0x200;
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Set if any contraction suffix ends with lccc!=0. */
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t CONTRACT_TRAILING_CCC = 0x400;
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t HANGUL_NO_SPECIAL_JAMO = 0x100;
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t LEAD_ALL_UNASSIGNED = 0;
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t LEAD_ALL_FALLBACK = 0x100;
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t LEAD_MIXED = 0x200;
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static const uint32_t LEAD_TYPE_MASK = 0x300;
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t makeLongPrimaryCE32(uint32_t p) { return p | LONG_PRIMARY_CE32_LOW_BYTE; }
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Turns the long-primary CE32 into a primary weight pppppp00. */
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline uint32_t primaryFromLongPrimaryCE32(uint32_t ce32) {
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ce32 & 0xffffff00;
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t ceFromLongPrimaryCE32(uint32_t ce32) {
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((int64_t)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE;
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t makeLongSecondaryCE32(uint32_t lower32) {
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG;
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t ceFromLongSecondaryCE32(uint32_t ce32) {
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ce32 & 0xffffff00;
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Makes a special CE32 with tag, index and length. */
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t makeCE32FromTagIndexAndLength(int32_t tag, int32_t index, int32_t length) {
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag;
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Makes a special CE32 with only tag and index. */
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t makeCE32FromTagAndIndex(int32_t tag, int32_t index) {
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag;
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool isSpecialCE32(uint32_t ce32) {
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE;
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int32_t tagFromCE32(uint32_t ce32) {
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (int32_t)(ce32 & 0xf);
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool hasCE32Tag(uint32_t ce32, int32_t tag) {
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag;
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool isLongPrimaryCE32(uint32_t ce32) {
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return hasCE32Tag(ce32, LONG_PRIMARY_TAG);
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isSimpleOrLongCE32(uint32_t ce32) {
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return !isSpecialCE32(ce32) ||
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == LONG_SECONDARY_TAG;
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if the ce32 yields one or more CEs without further data lookups
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static UBool isSelfContainedCE32(uint32_t ce32) {
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return !isSpecialCE32(ce32) ||
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == LONG_PRIMARY_TAG ||
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == LONG_SECONDARY_TAG ||
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == LATIN_EXPANSION_TAG;
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool isPrefixCE32(uint32_t ce32) {
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return hasCE32Tag(ce32, PREFIX_TAG);
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool isContractionCE32(uint32_t ce32) {
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return hasCE32Tag(ce32, CONTRACTION_TAG);
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline UBool ce32HasContext(uint32_t ce32) {
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return isSpecialCE32(ce32) &&
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                (tagFromCE32(ce32) == PREFIX_TAG ||
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tagFromCE32(ce32) == CONTRACTION_TAG);
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Get the first of the two Latin-expansion CEs encoded in ce32.
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @see LATIN_EXPANSION_TAG
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t latinCE0FromCE32(uint32_t ce32) {
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((int64_t)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce32 & 0xff0000) >> 8);
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Get the second of the two Latin-expansion CEs encoded in ce32.
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @see LATIN_EXPANSION_TAG
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t latinCE1FromCE32(uint32_t ce32) {
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE;
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the data index from a special CE32.
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int32_t indexFromCE32(uint32_t ce32) {
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (int32_t)(ce32 >> 13);
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the data length from a ce32.
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int32_t lengthFromCE32(uint32_t ce32) {
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (ce32 >> 8) & 31;
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the digit value from a DIGIT_TAG ce32.
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline char digitFromCE32(uint32_t ce32) {
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return (char)((ce32 >> 8) & 0xf);
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Returns a 64-bit CE from a simple CE32 (not special). */
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t ceFromSimpleCE32(uint32_t ce32) {
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // normal form ppppsstt -> pppp0000ss00tt00
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | ((ce32 & 0xff) << 8);
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t ceFromCE32(uint32_t ce32) {
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t tertiary = ce32 & 0xff;
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(tertiary < SPECIAL_CE32_LOW_BYTE) {
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // normal form ppppsstt -> pppp0000ss00tt00
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (tertiary << 8);
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 -= tertiary;
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((tertiary & 0xf) == LONG_PRIMARY_TAG) {
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // long-primary form ppppppC1 -> pppppp00050000500
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return ((int64_t)ce32 << 32) | COMMON_SEC_AND_TER_CE;
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // long-secondary form ssssttC2 -> 00000000sssstt00
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // assert (tertiary & 0xf) == LONG_SECONDARY_TAG
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return ce32;
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Creates a CE from a primary weight. */
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t makeCE(uint32_t p) {
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((int64_t)p << 32) | COMMON_SEC_AND_TER_CE;
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Creates a CE from a primary weight,
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * 16-bit secondary/tertiary weights, and a 2-bit quaternary.
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t makeCE(uint32_t p, uint32_t s, uint32_t t, uint32_t q) {
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ((int64_t)p << 32) | (s << 16) | t | (q << 6);
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Increments a 2-byte primary by a code point offset.
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible,
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                              int32_t offset);
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Increments a 3-byte primary by a code point offset.
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible,
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                                int32_t offset);
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Decrements a 2-byte primary by one range step (1..0x7f).
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step);
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Decrements a 3-byte primary by one range step (1..0x7f).
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step);
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Computes a 3-byte primary for c's OFFSET_TAG data "CE".
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE);
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the unassigned-character implicit primary weight for any valid code point c.
484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static uint32_t unassignedPrimaryFromCodePoint(UChar32 c);
486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    static inline int64_t unassignedCEFromCodePoint(UChar32 c) {
488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return makeCE(unassignedPrimaryFromCodePoint(c));
489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Collation();  // No instantiation.
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __COLLATION_H__
499