1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 2001-2008, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  utrie.h
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2001nov08
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Markus W. Scherer
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __UTRIE_H__
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define __UTRIE_H__
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "udataswp.h"
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * \file
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is a common implementation of a "folded" trie.
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unicode code points (0..0x10ffff).
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This implementation is optimized for getting values while walking forward
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * through a UTF-16 string.
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Therefore, the simplest and fastest access macros are the
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * _FROM_LEAD() and _FROM_OFFSET_TRAIL() macros.
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The _FROM_BMP() macros are a little more complicated; they get values
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * even for lead surrogate code _points_, while the _FROM_LEAD() macros
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * get special "folded" values for lead surrogate code _units_ if
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * there is relevant data associated with them.
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * From such a folded value, an offset needs to be extracted to supply
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to the _FROM_OFFSET_TRAIL() macros.
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Most of the more complex (and more convenient) functions/macros call a callback function
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to get that offset from the folded value for a lead surrogate unit.
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Trie constants, defining shift widths, index array lengths, etc.
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum {
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Shift size for shifting right the input index. 1..9 */
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_SHIFT=5,
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Number of data values in a stage 2 (data array) block. 2, 4, 8, .., 0x200 */
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_DATA_BLOCK_LENGTH=1<<UTRIE_SHIFT,
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Mask for getting the lower bits from the input index. */
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_MASK=UTRIE_DATA_BLOCK_LENGTH-1,
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Lead surrogate code points' index displacement in the index array.
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * 0x10000-0xd800=0x2800
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_LEAD_INDEX_DISP=0x2800>>UTRIE_SHIFT,
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Shift size for shifting left the index array values.
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Increases possible data size with 16-bit index values at the cost
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * of compactability.
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY.
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * 0..UTRIE_SHIFT
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_INDEX_SHIFT=2,
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** The alignment size of a stage 2 data block. Also the granularity for compaction. */
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_DATA_GRANULARITY=1<<UTRIE_INDEX_SHIFT,
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Number of bits of a trail surrogate that are used in index table lookups. */
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_SURROGATE_BLOCK_BITS=10-UTRIE_SHIFT,
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Number of index (stage 1) entries per lead surrogate.
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Same as number of index entries for 1024 trail surrogates,
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * ==0x400>>UTRIE_SHIFT
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_SURROGATE_BLOCK_COUNT=(1<<UTRIE_SURROGATE_BLOCK_BITS),
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Length of the BMP portion of the index (stage 1) array. */
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_BMP_INDEX_LENGTH=0x10000>>UTRIE_SHIFT
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Length of the index (stage 1) array before folding.
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT.
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT)
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the runtime data (stage 2) array.
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT.
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_MAX_DATA_LENGTH (0x10000<<UTRIE_INDEX_SHIFT)
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the build-time data (stage 2) array.
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (Number of Unicode code points + one all-initial-value block +
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *  possible duplicate entries for 1024 lead surrogates.)
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400)
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Number of bytes for a dummy trie.
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A dummy trie is an empty runtime trie, used when a real data trie cannot
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * be loaded.
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The number of bytes works for Latin-1-linear tries with 32-bit data
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (worst case).
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Calculation:
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   BMP index + 1 index block for lead surrogate code points +
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   Latin-1-linear array + 1 data block for lead surrogate code points
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block }
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see utrie_unserializeDummy
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4)
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Runtime UTrie callback function.
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Extract from a lead surrogate's data the
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * index array offset of the indexes for that lead surrogate.
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param data data value for a surrogate from the trie, including the folding offset
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef int32_t U_CALLCONV
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UTrieGetFoldingOffset(uint32_t data);
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Run-time Trie structure.
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Either the data table is 16 bits wide and accessed via the index
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * pointer, with each index item increased by indexLength;
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * in this case, data32==NULL.
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Or the data table is 32 bits wide and accessed via the data32 pointer.
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UTrie {
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint16_t *index;
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint32_t *data32; /* NULL if 16b data is used via index */
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros.
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * If convenience macros like _GET16 or _NEXT32 are used, this function must be set.
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * utrie_unserialize() sets a default function which simply returns
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the lead surrogate's value itself - which is the inverse of the default
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * folding function used by utrie_serialize().
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * @see UTrieGetFoldingOffset
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrieGetFoldingOffset *getFoldingOffset;
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t indexLength, dataLength;
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t initialValue;
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isLatin1Linear;
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifndef __UTRIE2_H__
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UTrie UTrie;
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_GET_RAW(trie, data, offset, c16) \
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    (trie)->data[ \
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<<UTRIE_INDEX_SHIFT)+ \
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ((c16)&UTRIE_MASK) \
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ]
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Internal trie getter from a pair of surrogates */
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) { \
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t __offset; \
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)\
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get data for lead surrogate */ \
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    __offset=(trie)->getFoldingOffset(result); \
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)\
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get the real data from the folded lead/trail units */ \
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(__offset>0) { \
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else { \
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=(resultType)((trie)->initialValue); \
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } \
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_GET_FROM_BMP(trie, data, c16) \
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16);
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Internal trie getter from a code point.
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Could be faster(?) but longer with
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); }
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_GET(trie, data, c32, result, resultType) \
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((uint32_t)(c32)<=0xffff) { \
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* BMP code points */ \
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if((uint32_t)(c32)<=0x10ffff) { \
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* supplementary code point */ \
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar __lead16=UTF16_LEAD(c32); \
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else { \
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* out of range */ \
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=(resultType)((trie)->initialValue); \
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Internal next-post-increment: get the next code point (c, c2) and its data */
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) { \
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    (c)=*(src)++; \
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!UTF_IS_LEAD(c)) { \
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (c2)=0; \
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if((src)!=(limit) && UTF_IS_TRAIL((c2)=*(src))) { \
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++(src); \
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else { \
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* unpaired lead surrogate code point */ \
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (c2)=0; \
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } \
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** Internal previous: get the previous code point (c, c2) and its data */
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) { \
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    (c)=*--(src); \
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!UTF_IS_SURROGATE(c)) { \
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (c2)=0; \
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else if(!UTF_IS_SURROGATE_FIRST(c)) { \
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* trail surrogate */ \
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if((start)!=(src) && UTF_IS_LEAD((c2)=*((src)-1))) { \
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --(src); \
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else { \
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* unpaired trail surrogate code point */ \
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (c2)=0; \
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } \
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else { \
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* unpaired lead surrogate code point */ \
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (c2)=0; \
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } \
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Public UTrie API ---------------------------------------------------------*/
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a pointer to the contiguous part of the data array
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for the Latin-1 range (U+0000..U+00ff).
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Must be used only if the Latin-1 range is in fact linear
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (trie->isLatin1Linear).
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (const uint16_t *) pointer to values for Latin-1 code points
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH)
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a pointer to the contiguous part of the data array
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for the Latin-1 range (U+0000..U+00ff).
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Must be used only if the Latin-1 range is in fact linear
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (trie->isLatin1Linear).
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (const uint32_t *) pointer to values for Latin-1 code points
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH)
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * c16 may be a lead surrogate, which may have a value including a folding offset.
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c16 (UChar, in) the input BMP code point
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint16_t) trie lookup result
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16)
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * c16 may be a lead surrogate, which may have a value including a folding offset.
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c16 (UChar, in) the input BMP code point
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint32_t) trie lookup result
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16)
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff).
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Even lead surrogate code points are treated as normal code points,
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with unfolded values that may differ from _FROM_LEAD() macro results for them.
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c16 (UChar, in) the input BMP code point
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint16_t) trie lookup result
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16)
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff).
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Even lead surrogate code points are treated as normal code points,
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with unfolded values that may differ from _FROM_LEAD() macro results for them.
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c16 (UChar, in) the input BMP code point
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint32_t) trie lookup result
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16)
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 16-bit trie value from a code point.
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Even lead surrogate code points are treated as normal code points,
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with unfolded values that may differ from _FROM_LEAD() macro results for them.
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c32 (UChar32, in) the input code point
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint16_t, out) uint16_t variable for the trie lookup result
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t)
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 32-bit trie value from a code point.
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Even lead surrogate code points are treated as normal code points,
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with unfolded values that may differ from _FROM_LEAD() macro results for them.
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c32 (UChar32, in) the input code point
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint32_t, out) uint32_t variable for the trie lookup result
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t)
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the next code point (c, c2), post-increment src,
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and get a 16-bit value from the trie.
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param src (const UChar *, in/out) the source text pointer
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param limit (const UChar *, in) the limit pointer for the text, or NULL
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, out) variable for the BMP or lead code unit
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, out) variable for 0 or the trail code unit
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint16_t, out) uint16_t variable for the trie lookup result
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t)
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the next code point (c, c2), post-increment src,
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and get a 32-bit value from the trie.
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param src (const UChar *, in/out) the source text pointer
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param limit (const UChar *, in) the limit pointer for the text, or NULL
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, out) variable for the BMP or lead code unit
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, out) variable for 0 or the trail code unit
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint32_t, out) uint32_t variable for the trie lookup result
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t)
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the previous code point (c, c2), pre-decrement src,
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and get a 16-bit value from the trie.
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param start (const UChar *, in) the start pointer for the text, or NULL
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param src (const UChar *, in/out) the source text pointer
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, out) variable for the BMP or lead code unit
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, out) variable for 0 or the trail code unit
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint16_t, out) uint16_t variable for the trie lookup result
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t)
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the previous code point (c, c2), pre-decrement src,
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and get a 32-bit value from the trie.
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param start (const UChar *, in) the start pointer for the text, or NULL
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param src (const UChar *, in/out) the source text pointer
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, out) variable for the BMP or lead code unit
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, out) variable for 0 or the trail code unit
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint32_t, out) uint32_t variable for the trie lookup result
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t)
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 16-bit trie value from a pair of surrogates.
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, in) a lead surrogate
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, in) a trail surrogate
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint16_t, out) uint16_t variable for the trie lookup result
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t)
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 32-bit trie value from a pair of surrogates.
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c (UChar, in) a lead surrogate
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, in) a trail surrogate
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result (uint32_t, out) uint32_t variable for the trie lookup result
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t)
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate)
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and a trail surrogate.
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint16_t) trie lookup result
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff)
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate)
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and a trail surrogate.
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie (const UTrie *, in) a pointer to the runtime trie structure
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param offset (int32_t, in) the folding offset from the value of a lead surrogate
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant)
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return (uint32_t) trie lookup result
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff)
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* enumeration callback types */
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Callback from utrie_enum(), extracts a uint32_t value from a
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * trie value. This value will be passed on to the UTrieEnumRange function.
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param context an opaque pointer, as passed into utrie_enum()
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value a value from the trie
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the value that is to be passed on to the UTrieEnumRange function
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef uint32_t U_CALLCONV
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UTrieEnumValue(const void *context, uint32_t value);
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Callback from utrie_enum(), is called for each contiguous range
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of code points with the same value as retrieved from the trie and
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * transformed by the UTrieEnumValue function.
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The callback function can stop the enumeration by returning FALSE.
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param context an opaque pointer, as passed into utrie_enum()
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param start the first code point in a contiguous range with value
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param limit one past the last code point in a contiguous range with value
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value the value that is set for all code points in [start..limit[
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return FALSE to stop the enumeration
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef UBool U_CALLCONV
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value);
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enumerate efficiently all values in a trie.
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For each entry in the trie, the value to be delivered is passed through
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the UTrieEnumValue function.
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The value is unchanged if that function pointer is NULL.
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * For each contiguous range of code points with a given value,
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the UTrieEnumRange function is called.
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie a pointer to the runtime trie structure
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param enumValue a pointer to a function that may transform the trie entry value,
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                  or NULL if the values from the trie are to be used directly
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param enumRange a pointer to a function that is called for each contiguous range
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                  of code points with the same value
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param context an opaque pointer that is passed on to the callback functions
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_enum(const UTrie *trie,
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context);
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Unserialize a trie from 32-bit-aligned memory.
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Inverse of utrie_serialize().
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Fills the UTrie runtime trie structure with the settings for the trie data.
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie a pointer to the runtime trie structure
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param data a pointer to 32-bit-aligned memory containing trie data
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param length the number of bytes available at data
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pErrorCode an in/out ICU UErrorCode
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the number of bytes at data taken up by the trie data
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode);
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * "Unserialize" a dummy trie.
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * A dummy trie is an empty runtime trie, used when a real data trie cannot
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * be loaded.
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The input memory is filled so that the trie always returns the initialValue,
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or the leadUnitValue for lead surrogate code points.
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The Latin-1 part is always set up to be linear.
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie a pointer to the runtime trie structure
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE)
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param initialValue the initial value that is set for all code points
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param leadUnitValue the value for lead surrogate code _units_ that do not
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                      have associated supplementary data
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pErrorCode an in/out ICU UErrorCode
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see UTRIE_DUMMY_SIZE
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see utrie_open
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserializeDummy(UTrie *trie,
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       void *data, int32_t length,
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       uint32_t initialValue, uint32_t leadUnitValue,
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UBool make16BitTrie,
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UErrorCode *pErrorCode);
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Default implementation for UTrie.getFoldingOffset, set automatically by
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * utrie_unserialize().
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Simply returns the lead surrogate's value itself - which is the inverse
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the default folding function used by utrie_serialize().
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Exported for static const UTrie structures.
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see UTrieGetFoldingOffset
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_defaultGetFoldingOffset(uint32_t data);
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Building a trie ----------------------------------------------------------*/
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Build-time trie structure.
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Opaque definition, here only to make fillIn parameters possible
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for utrie_open() and utrie_clone().
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct UNewTrie {
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Index values at build-time are 32 bits wide for easier processing.
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()).
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t index[UTRIE_MAX_INDEX_LENGTH];
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *data;
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t leadUnitValue;
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t indexLength, dataCapacity, dataLength;
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isAllocated, isDataAllocated;
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isLatin1Linear, isCompacted;
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Map of adjusted indexes, used in utrie_compact().
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Maps from original indexes to new ones.
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT];
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UNewTrie UNewTrie;
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Build-time trie callback function, used with utrie_serialize().
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This function calculates a lead surrogate's value including a folding offset
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * from the 1024 supplementary code points [start..start+1024[ .
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0.
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The folding offset is provided by the caller.
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Instead of the offset itself, n can be stored in 10 bits -
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or fewer if it can be assumed that few lead surrogates have associated data.
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The returned value must be
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - not zero if and only if there is relevant data
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *   for the corresponding 1024 supplementary code points
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return a folded value, or 0 if there is no relevant data for the lead surrogate.
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef uint32_t U_CALLCONV
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset);
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Open a build-time trie structure.
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The size of the build-time data array is specified to avoid allocating a large
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * array in all cases. The array itself can also be passed in.
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Although the trie is never fully expanded to a linear array, especially when
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * utrie_setRange32() is used, the data array could be large during build time.
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The maximum length is
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400.
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (Number of Unicode code points + one all-initial-value block +
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *  possible duplicate entries for 1024 lead surrogates.)
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.)
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *               NULL if one is to be allocated
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param aliasData a pointer to a data array to be used (will not be released), or
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                  NULL if one is to be allocated
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param maxDataLength the capacity of aliasData (if not NULL) or
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                      the length of the data array to be allocated
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param initialValue the initial value that is set for all code points
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param leadUnitValue the value for lead surrogate code _units_ that do not
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                      have associated supplementary data
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                     kept in a linear, contiguous part of the data array
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_open(UNewTrie *fillIn,
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           uint32_t *aliasData, int32_t maxDataLength,
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           uint32_t initialValue, uint32_t leadUnitValue,
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UBool latin1Linear);
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Clone a build-time trie structure with all entries.
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param fillIn like in utrie_open()
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param other the build-time trie structure to clone
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param aliasData like in utrie_open(),
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                  used if aliasDataLength>=(capacity of other's data array)
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param aliasDataLength the length of aliasData
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength);
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Close a build-time trie structure, and release memory
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * that was allocated by utrie_open() or utrie_clone().
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_close(UNewTrie *trie);
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get the data array of a build-time trie.
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The data may be modified, but entries that are equal before
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * must still be equal after modification.
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pLength (out) a pointer to a variable that receives the number
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                of entries in the data array
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the data array
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t * U_EXPORT2
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_getData(UNewTrie *trie, int32_t *pLength);
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set a value for a code point.
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c the code point
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value the value
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return FALSE if a failure occurred (illegal argument or data array overrun)
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value);
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Get a value from a code point as stored in the build-time trie.
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param c the code point
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pInBlockZero if not NULL, then *pInBlockZero is set to TRUE
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                     iff the value is retrieved from block 0;
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                     block 0 is the all-initial-value initial block
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the value
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero);
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set a value in a range of code points [start..limit[.
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * All code points c with start<=c<limit will get the value if
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * overwrite is TRUE or if the old value is 0.
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param start the first code point to get the value
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param limit one past the last code point to get the value
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value the value
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param overwrite flag for whether old non-initial values are to be overwritten
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return FALSE if a failure occurred (illegal argument or data array overrun)
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite);
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact the build-time trie after all values are set, and then
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * serialize it into 32-bit aligned memory.
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * After this, the trie can only be serizalized again and/or closed;
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * no further values can be added.
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @see utrie_unserialize()
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param trie the build-time trie
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param data a pointer to 32-bit-aligned memory for the trie data
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param capacity the number of bytes available at data
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param getFoldedValue a callback function that calculates the value for
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                       a lead surrogate from all of its supplementary code points
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                       and the folding offset;
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                       if NULL, then a default function is used which returns just
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                       the input offset when there are any non-initial-value entries
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param reduceTo16Bits flag for whether the values are to be reduced to a
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *                       width of 16 bits for serialization and runtime
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pErrorCode a UErrorCode argument; among other possible error codes:
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - U_MEMORY_ALLOCATION_ERROR if the trie data array is too small
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - U_INDEX_OUTOFBOUNDS_ERROR if the index or data arrays are too long after compaction for serialization
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return the number of bytes written for the trie
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_serialize(UNewTrie *trie, void *data, int32_t capacity,
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UNewTrieGetFoldedValue *getFoldedValue,
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UBool reduceTo16Bits,
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UErrorCode *pErrorCode);
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Swap a serialized UTrie.
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_swap(const UDataSwapper *ds,
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           const void *inData, int32_t length, void *outData,
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UErrorCode *pErrorCode);
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* serialization ------------------------------------------------------------ */
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Trie data structure in serialized form:
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UTrieHeader header;
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint16_t index[header.indexLength];
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * uint16_t data[header.dataLength];
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct UTrieHeader {
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** "Trie" in big-endian US-ASCII (0x54726965) */
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t signature;
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * options bit field:
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *     9    1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *     8    0=16-bit data, 1=32-bit data
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *  7..4    UTRIE_INDEX_SHIFT   // 0..UTRIE_SHIFT
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *  3..0    UTRIE_SHIFT         // 1..9
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t options;
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t indexLength;
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t dataLength;
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} UTrieHeader;
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Constants for use with UTrieHeader.options.
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum {
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Mask to get the UTRIE_SHIFT value from options. */
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_OPTIONS_SHIFT_MASK=0xf,
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_OPTIONS_INDEX_SHIFT=4,
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** If set, then the data (stage 2) array is 32 bits wide. */
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_OPTIONS_DATA_IS_32_BIT=0x100,
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH.
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
795