1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 2001-2009, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  utrie.c
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2001oct20
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Markus W. Scherer
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   This is a common implementation of a "folded" trie.
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Unicode code points (0..0x10ffff).
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#   include <stdio.h>
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h"
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* miscellaneous ------------------------------------------------------------ */
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#undef ABS
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ABS(x) ((x)>=0 ? (x) : -(x))
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(length>0 && *s==*t) {
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++s;
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++t;
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        --length;
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(length==0);
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Building a trie ----------------------------------------------------------*/
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_open(UNewTrie *fillIn,
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           uint32_t *aliasData, int32_t maxDataLength,
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           uint32_t initialValue, uint32_t leadUnitValue,
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UBool latin1Linear) {
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie *trie;
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, j;
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH ||
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (latin1Linear && maxDataLength<1024)
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(fillIn!=NULL) {
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie=fillIn;
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie));
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie==NULL) {
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memset(trie, 0, sizeof(UNewTrie));
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isAllocated= (UBool)(fillIn==NULL);
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(aliasData!=NULL) {
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data=aliasData;
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->isDataAllocated=FALSE;
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data=(uint32_t *)uprv_malloc(maxDataLength*4);
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->data==NULL) {
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_free(trie);
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->isDataAllocated=TRUE;
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* preallocate and reset the first data block (block index 0) */
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    j=UTRIE_DATA_BLOCK_LENGTH;
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(latin1Linear) {
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* made sure above that maxDataLength>=1024 */
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* set indexes to point to consecutive data blocks */
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i=0;
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        do {
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->index[i++]=j;
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            j+=UTRIE_DATA_BLOCK_LENGTH;
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } while(i<(256>>UTRIE_SHIFT));
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* reset the initially allocated blocks to the initial value */
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=j;
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(j>0) {
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data[--j]=initialValue;
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->leadUnitValue=leadUnitValue;
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->indexLength=UTRIE_MAX_INDEX_LENGTH;
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataCapacity=maxDataLength;
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isLatin1Linear=latin1Linear;
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isCompacted=FALSE;
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie;
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) {
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie *trie;
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool isDataAllocated;
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* do not clone if other is not valid or already compacted */
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other==NULL || other->data==NULL || other->isCompacted) {
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* clone data */
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) {
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isDataAllocated=FALSE;
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        aliasDataCapacity=other->dataCapacity;
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4);
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(aliasData==NULL) {
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return NULL;
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        isDataAllocated=TRUE;
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie=utrie_open(fillIn, aliasData, aliasDataCapacity,
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    other->data[0], other->leadUnitValue,
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    other->isLatin1Linear);
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL) {
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(aliasData);
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(trie->index, other->index, sizeof(trie->index));
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(trie->data, other->data, other->dataLength*4);
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->dataLength=other->dataLength;
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->isDataAllocated=isDataAllocated;
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie;
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_close(UNewTrie *trie) {
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie!=NULL) {
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->isDataAllocated) {
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_free(trie->data);
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->data=NULL;
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->isAllocated) {
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_free(trie);
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t * U_EXPORT2
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_getData(UNewTrie *trie, int32_t *pLength) {
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || pLength==NULL) {
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *pLength=trie->dataLength;
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie->data;
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_allocDataBlock(UNewTrie *trie) {
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t newBlock, newTop;
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newBlock=trie->dataLength;
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH;
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newTop>trie->dataCapacity) {
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* out of memory in the data array */
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=newTop;
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return newBlock;
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * No error checking for illegal arguments.
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return -1 if no new data block available (out of memory in data array)
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_getDataBlock(UNewTrie *trie, UChar32 c) {
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t indexValue, newBlock;
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c>>=UTRIE_SHIFT;
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    indexValue=trie->index[c];
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(indexValue>0) {
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return indexValue;
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* allocate a new data block */
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newBlock=utrie_allocDataBlock(trie);
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newBlock<0) {
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* out of memory in the data array */
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index[c]=newBlock;
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* copy-on-write for a block from a setRange() */
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH);
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return newBlock;
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return TRUE if the value was successfully set
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) {
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* valid, uncompacted trie and valid c? */
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block=utrie_getDataBlock(trie, c);
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(block<0) {
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->data[block+(c&UTRIE_MASK)]=value;
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) {
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* valid, uncompacted trie and valid c? */
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) {
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(pInBlockZero!=NULL) {
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pInBlockZero=TRUE;
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block=trie->index[c>>UTRIE_SHIFT];
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(pInBlockZero!=NULL) {
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pInBlockZero= (UBool)(block==0);
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie->data[ABS(block)+(c&UTRIE_MASK)];
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                uint32_t value, uint32_t initialValue, UBool overwrite) {
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *pLimit;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pLimit=block+limit;
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block+=start;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(overwrite) {
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(block<pLimit) {
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *block++=value;
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(block<pLimit) {
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(*block==initialValue) {
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *block=value;
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++block;
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) {
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * repeat value in [start..limit[
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * mark index values for repeat-data blocks by setting bit 31 of the index values
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * fill around existing values if any, if(overwrite)
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t initialValue;
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block, rest, repeatBlock;
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* valid, uncompacted trie and valid indexes? */
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( trie==NULL || trie->isCompacted ||
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(start==limit) {
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return TRUE; /* nothing to do */
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialValue=trie->data[0];
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(start&UTRIE_MASK) {
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 nextStart;
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* set partial block at [start..following block boundary[ */
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=utrie_getDataBlock(trie, start);
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block<0) {
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return FALSE;
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK;
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(nextStart<=limit) {
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH,
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            value, initialValue, overwrite);
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=nextStart;
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK,
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            value, initialValue, overwrite);
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return TRUE;
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* number of positions in the last, partial block */
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    rest=limit&UTRIE_MASK;
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* round down limit to a block boundary */
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    limit&=~UTRIE_MASK;
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* iterate over all-value blocks */
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(value==initialValue) {
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        repeatBlock=0;
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        repeatBlock=-1;
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(start<limit) {
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* get index value */
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=trie->index[start>>UTRIE_SHIFT];
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block>0) {
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* already allocated, fill in value */
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite);
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(trie->data[-block]!=value && (block==0 || overwrite)) {
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* set the repeatBlock instead of the current block 0 or range block */
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(repeatBlock>=0) {
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* create and set and fill the repeatBlock */
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                repeatBlock=utrie_getDataBlock(trie, start);
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(repeatBlock<0) {
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return FALSE;
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* set the negative block number to indicate that it is a repeat block */
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->index[start>>UTRIE_SHIFT]=-repeatBlock;
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE);
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        start+=UTRIE_DATA_BLOCK_LENGTH;
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(rest>0) {
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* set partial block at [last block boundary..limit[ */
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=utrie_getDataBlock(trie, start);
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block<0) {
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return FALSE;
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite);
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return TRUE;
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findSameIndexBlock(const int32_t *idx, int32_t indexLength,
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t otherBlock) {
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block, i;
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) {
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) {
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(idx[block+i]!=idx[otherBlock+i]) {
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                break;
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i==UTRIE_SURROGATE_BLOCK_COUNT) {
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return block;
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return indexLength;
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Fold the normalization data for supplementary code points into
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * a compact area on top of the BMP-part of the trie index,
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with the lead surrogates indexing this compact area.
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Duplicate the index values for lead surrogates:
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * From inside the BMP area, where some may be overridden with folded values,
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to just after the BMP area, where they can be retrieved for
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * code point lookups.
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) {
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT];
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t *idx;
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t value;
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c;
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t indexLength, block;
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int countLeadCUWithData=0;
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    idx=trie->index;
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* copy the lead surrogate indexes into a temporary array */
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT);
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * set all values for lead surrogate code *units* to leadUnitValue
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * so that, by default, runtime lookups will find no data for associated
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * supplementary code points, unless there is data for such code points
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * which will result in a non-zero folding value below that is set for
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the respective lead units
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the above saved the indexes for surrogate code *points*
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * fill the indexes with simplified code from utrie_setRange32()
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->leadUnitValue==trie->data[0]) {
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=0; /* leadUnitValue==initialValue, use all-initial-value block */
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* create and fill the repeatBlock */
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=utrie_allocDataBlock(trie);
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block<0) {
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* data table overflow */
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE);
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=-block; /* negative block number to indicate that it is a repeat block */
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) {
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index[c]=block;
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Fold significant index values into the area just after the BMP indexes.
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * In case the first lead surrogate has significant data,
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * its index block must be used first (in which case the folding is a no-op).
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Later all folded index blocks are moved up one to insert the copied
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * lead surrogate indexes.
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    indexLength=UTRIE_BMP_INDEX_LENGTH;
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* search for any index (stage 1) entries for supplementary code points */
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(c=0x10000; c<0x110000;) {
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(idx[c>>UTRIE_SHIFT]!=0) {
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* there is data, treat the full block for a lead surrogate */
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c&=~0x3ff;
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++countLeadCUWithData;
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* is there an identical index block? */
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT);
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /*
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * get a folded value for [c..c+0x400[ and,
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * if different from the value for the lead surrogate code point,
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * set it for the lead surrogate code unit
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             */
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT);
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) {
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(!utrie_set32(trie, U16_LEAD(c), value)) {
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* data table overflow */
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return;
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* if we did not find an identical index block... */
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(block==indexLength) {
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* move the actual index (stage 1) entries from the supplementary position to the new one */
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    uprv_memmove(idx+indexLength,
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 idx+(c>>UTRIE_SHIFT),
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                 4*UTRIE_SURROGATE_BLOCK_COUNT);
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=0x400;
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=UTRIE_DATA_BLOCK_LENGTH;
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(countLeadCUWithData>0) {
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("supplementary data for %d lead surrogates\n", countLeadCUWithData);
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * index array overflow?
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * This is to guarantee that a folding offset is of the form
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023.
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * If the index is too large, then n>=1024 and more than 10 bits are necessary.
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     *
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * In fact, it can only ever become n==1024 with completely unfoldable data and
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * the additional block of duplicated values for lead surrogates.
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(indexLength>=UTRIE_MAX_INDEX_LENGTH) {
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * make space for the lead surrogate index block and
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * insert it between the BMP indexes and the folded ones
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT,
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 idx+UTRIE_BMP_INDEX_LENGTH,
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 4*(indexLength-UTRIE_BMP_INDEX_LENGTH));
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH,
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                leadIndexes,
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                4*UTRIE_SURROGATE_BLOCK_COUNT);
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    indexLength+=UTRIE_SURROGATE_BLOCK_COUNT;
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("trie index count: BMP %ld  all Unicode %ld  folded %ld\n",
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength);
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->indexLength=indexLength;
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set a value in the trie index map to indicate which data block
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is referenced and which one is not.
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * utrie_compact() will remove data blocks that are not used at all.
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - 0 if it is used
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - -1 if it is not used
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findUnusedBlocks(UNewTrie *trie) {
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i;
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* fill the entire map with "not used" */
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4);
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* mark each block that _is_ used with 0 */
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<trie->indexLength; ++i) {
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0;
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* never move the all-initial-value block 0 */
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->map[0]=0;
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findSameDataBlock(const uint32_t *data, int32_t dataLength,
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                   int32_t otherBlock, int32_t step) {
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* ensure that we do not even partially get past dataLength */
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dataLength-=UTRIE_DATA_BLOCK_LENGTH;
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(block=0; block<=dataLength; block+=step) {
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) {
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return block;
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return -1;
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact a folded build-time trie.
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The compaction
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - removes blocks that are identical with earlier ones
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves blocks in steps of the data granularity
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves and overlaps blocks that overlap with multiple values in the overlap region
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It does not
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - try to move and overlap blocks that are not already adjacent
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) {
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, start, newStart, overlapStart;
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* valid, uncompacted trie? */
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL) {
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->isCompacted) {
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return; /* nothing left to do */
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* compaction */
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* initialize the index map with "block is used/unused" flags */
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    _findUnusedBlocks(trie);
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->isLatin1Linear && UTRIE_SHIFT<=8) {
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        overlapStart=UTRIE_DATA_BLOCK_LENGTH+256;
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        overlapStart=UTRIE_DATA_BLOCK_LENGTH;
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newStart=UTRIE_DATA_BLOCK_LENGTH;
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(start=newStart; start<trie->dataLength;) {
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * start: index of first entry of current block
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * newStart: index where the current block is to be moved
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         *           (right after current end of already-compacted data)
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         */
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* skip blocks that are not used */
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->map[start>>UTRIE_SHIFT]<0) {
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* advance start to the next block */
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE_DATA_BLOCK_LENGTH;
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* leave newStart with the previous block! */
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* search for an identical block */
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if( start>=overlapStart &&
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (i=_findSameDataBlock(trie->data, newStart, start,
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH))
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             >=0
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ) {
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* found an identical block, set the other block's index value for the current block */
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE_SHIFT]=i;
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* advance start to the next block */
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE_DATA_BLOCK_LENGTH;
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* leave newStart with the previous block! */
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* see if the beginning of this block can be overlapped with the end of the previous block */
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(overlap && start>=overlapStart) {
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY;
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i);
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                i-=UTRIE_DATA_GRANULARITY) {}
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=0;
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i>0) {
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* some overlap */
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE_SHIFT]=newStart-i;
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* move the non-overlapping indexes to their new positions */
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=i;
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) {
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->data[newStart++]=trie->data[start++];
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(newStart<start) {
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* no overlap, just move the indexes to their new positions */
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE_SHIFT]=newStart;
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) {
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->data[newStart++]=trie->data[start++];
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else /* no overlap && newStart==start */ {
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE_SHIFT]=start;
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            newStart+=UTRIE_DATA_BLOCK_LENGTH;
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=newStart;
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* now adjust the index (stage 1) table */
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<trie->indexLength; ++i) {
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT];
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* we saved some space */
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("compacting trie: count of 32-bit words %lu->%lu\n",
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (long)trie->dataLength, (long)newStart);
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=newStart;
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* serialization ------------------------------------------------------------ */
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Default function for the folding value:
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Just store the offset (16 bits) if there is any non-initial-value entry.
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The offset parameter is never 0.
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returning the offset itself is safe for UTRIE_SHIFT>=5 because
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which fits into 16-bit trie values;
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases.
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Theoretically, it would be safer for all possible UTRIE_SHIFT including
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which would always result in a value of 0x40..0x43f
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (start/end 1k blocks of supplementary Unicode code points).
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * However, this would be uglier, and would not work for some existing
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * binary data file formats.
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Also, we do not plan to change UTRIE_SHIFT because it would change binary
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * data file formats, and we would probably not make it smaller because of
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the then even larger BMP index length even for empty tries.
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t U_CALLCONV
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t value, initialValue;
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 limit;
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool inBlockZero;
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialValue=trie->data[0];
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    limit=start+0x400;
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(start<limit) {
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        value=utrie_get32(trie, start, &inBlockZero);
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(inBlockZero) {
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE_DATA_BLOCK_LENGTH;
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(value!=initialValue) {
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return (uint32_t)offset;
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++start;
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0;
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity,
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UNewTrieGetFoldedValue *getFoldedValue,
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UBool reduceTo16Bits,
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                UErrorCode *pErrorCode) {
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrieHeader *header;
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *p;
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t *dest16;
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, length;
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint8_t* data = NULL;
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* argument check */
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) {
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(getFoldedValue==NULL) {
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        getFoldedValue=defaultGetFoldedValue;
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    data = (uint8_t*)dt;
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* fold and compact if necessary, also checks that indexLength is within limits */
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!trie->isCompacted) {
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* compact once without overlap to improve folding */
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_compact(trie, FALSE, pErrorCode);
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* fold the supplementary part of the index array */
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_fold(trie, getFoldedValue, pErrorCode);
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* compact again with overlap for minimum data array length */
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_compact(trie, TRUE, pErrorCode);
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->isCompacted=TRUE;
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_FAILURE(*pErrorCode)) {
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return 0;
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* is dataLength within limits? */
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) {
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length=sizeof(UTrieHeader)+2*trie->indexLength;
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(reduceTo16Bits) {
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length+=2*trie->dataLength;
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length+=4*trie->dataLength;
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length>capacity) {
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return length; /* preflighting */
795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG
798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("**UTrieLengths(serialize)** index:%6ld  data:%6ld  serialized:%6ld\n",
799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           (long)trie->indexLength, (long)trie->dataLength, (long)length);
800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the header fields */
803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header=(UTrieHeader *)data;
804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    data+=sizeof(UTrieHeader);
805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->signature=0x54726965; /* "Trie" */
807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT);
808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!reduceTo16Bits) {
810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT;
811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->isLatin1Linear) {
813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR;
814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->indexLength=trie->indexLength;
817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->dataLength=trie->dataLength;
818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */
820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(reduceTo16Bits) {
821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */
822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=(uint32_t *)trie->index;
823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dest16=(uint16_t *)data;
824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=trie->indexLength; i>0; --i) {
825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT);
826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 16-bit data values */
829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=trie->data;
830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=trie->dataLength; i>0; --i) {
831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)*p++;
832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */
835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=(uint32_t *)trie->index;
836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dest16=(uint16_t *)data;
837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=trie->indexLength; i>0; --i) {
838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT);
839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 32-bit data values */
842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(dest16, trie->data, 4*trie->dataLength);
843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return length;
846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* inverse to defaultGetFoldedValue() */
849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_defaultGetFoldingOffset(uint32_t data) {
851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (int32_t)data;
852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) {
856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const UTrieHeader *header;
857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint16_t *p16;
858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t options;
859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enough data for a trie header? */
865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length<sizeof(UTrieHeader)) {
866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INVALID_FORMAT_ERROR;
867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* check the signature */
871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header=(const UTrieHeader *)data;
872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(header->signature!=0x54726965) {
873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INVALID_FORMAT_ERROR;
874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get the options and check the shift values */
878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    options=header->options;
879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT
881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INVALID_FORMAT_ERROR;
883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0);
886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get the length values */
888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->indexLength=header->indexLength;
889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=header->dataLength;
890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length-=(int32_t)sizeof(UTrieHeader);
892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enough data for the index? */
894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length<2*trie->indexLength) {
895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INVALID_FORMAT_ERROR;
896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p16=(const uint16_t *)(header+1);
899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index=p16;
900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p16+=trie->indexLength;
901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length-=2*trie->indexLength;
902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get the data */
904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) {
905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(length<4*trie->dataLength) {
906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_INVALID_FORMAT_ERROR;
907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=(const uint32_t *)p16;
910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->initialValue=trie->data32[0];
911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength;
912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(length<2*trie->dataLength) {
914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_INVALID_FORMAT_ERROR;
915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;
916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* the "data16" data is used via the index pointer */
919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=NULL;
920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->initialValue=trie->index[trie->indexLength];
921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength;
922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return length;
927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserializeDummy(UTrie *trie,
931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       void *data, int32_t length,
932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       uint32_t initialValue, uint32_t leadUnitValue,
933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UBool make16BitTrie,
934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UErrorCode *pErrorCode) {
935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t *p16;
936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t actualLength, latin1Length, i, limit;
937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t block;
938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* calculate the actual size of the dummy trie data */
944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* max(Latin-1, block 0) */
946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    latin1Length= UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;
947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT;
949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=latin1Length;
950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(leadUnitValue!=initialValue) {
951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH;
952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    actualLength=trie->indexLength*2;
955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(make16BitTrie) {
956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        actualLength+=trie->dataLength*2;
957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        actualLength+=trie->dataLength*4;
959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enough space for the dummy trie? */
962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(length<actualLength) {
963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return actualLength;
965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isLatin1Linear=TRUE;
968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->initialValue=initialValue;
969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* fill the index and data arrays */
971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p16=(uint16_t *)data;
972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index=p16;
973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(make16BitTrie) {
975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* indexes to block 0 */
976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT);
977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        limit=trie->indexLength;
978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=0; i<limit; ++i) {
979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p16[i]=block;
980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(leadUnitValue!=initialValue) {
983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* indexes for lead surrogate code units to the block after Latin-1 */
984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=0xd800>>UTRIE_SHIFT;
986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limit=0xdc00>>UTRIE_SHIFT;
987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(; i<limit; ++i) {
988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p16[i]=block;
989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=NULL;
993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* Latin-1 data */
995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p16+=trie->indexLength;
996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=0; i<latin1Length; ++i) {
997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p16[i]=(uint16_t)initialValue;
998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* data for lead surrogate code units */
1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(leadUnitValue!=initialValue) {
1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(/* i=latin1Length */; i<limit; ++i) {
1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p16[i]=(uint16_t)leadUnitValue;
1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uint32_t *p32;
1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* indexes to block 0 */
1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memset(p16, 0, trie->indexLength*2);
1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(leadUnitValue!=initialValue) {
1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* indexes for lead surrogate code units to the block after Latin-1 */
1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT);
1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=0xd800>>UTRIE_SHIFT;
1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limit=0xdc00>>UTRIE_SHIFT;
1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(; i<limit; ++i) {
1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p16[i]=block;
1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=p32=(uint32_t *)(p16+trie->indexLength);
1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* Latin-1 data */
1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=0; i<latin1Length; ++i) {
1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            p32[i]=initialValue;
1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* data for lead surrogate code units */
1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(leadUnitValue!=initialValue) {
1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH;
1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(/* i=latin1Length */; i<limit; ++i) {
1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                p32[i]=leadUnitValue;
1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->getFoldingOffset=utrie_defaultGetFoldingOffset;
1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return actualLength;
1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* enumeration -------------------------------------------------------------- */
1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* default UTrieEnumValue() returns the input value itself */
1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t U_CALLCONV
1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enumSameValue(const void *context, uint32_t value) {
1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return value;
1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enumerate all ranges of code points with the same relevant values.
1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The values are transformed from the raw trie entries by the enumValue function.
1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_enum(const UTrie *trie,
1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) {
1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint32_t *data32;
1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint16_t *idx;
1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t value, prevValue, initialValue;
1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c, prev;
1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t l, i, j, block, prevBlock, nullBlock, offset;
1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* check arguments */
1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || trie->index==NULL || enumRange==NULL) {
1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(enumValue==NULL) {
1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        enumValue=enumSameValue;
1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    idx=trie->index;
1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    data32=trie->data32;
1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* get the enumeration value that corresponds to an initial-value trie data entry */
1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialValue=enumValue(context, trie->initialValue);
1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(data32==NULL) {
1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nullBlock=trie->indexLength;
1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nullBlock=0;
1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set variables for previous range */
1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prevBlock=nullBlock;
1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prev=0;
1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prevValue=initialValue;
1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enumerate BMP - the main loop enumerates data blocks */
1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0, c=0; c<=0xffff; ++i) {
1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(c==0xd800) {
1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* skip lead surrogate code _units_, go to lead surr. code _points_ */
1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=UTRIE_BMP_INDEX_LENGTH;
1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(c==0xdc00) {
1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* go back to regular BMP code points */
1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=c>>UTRIE_SHIFT;
1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=idx[i]<<UTRIE_INDEX_SHIFT;
1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block==prevBlock) {
1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* the block is the same as the previous one, and filled with value */
1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=UTRIE_DATA_BLOCK_LENGTH;
1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(block==nullBlock) {
1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* this is the all-initial-value block */
1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(prevValue!=initialValue) {
1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(prev<c) {
1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!enumRange(context, prev, c, prevValue)) {
1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevBlock=nullBlock;
1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prev=c;
1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevValue=initialValue;
1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=UTRIE_DATA_BLOCK_LENGTH;
1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            prevBlock=block;
1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(value!=prevValue) {
1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(prev<c) {
1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(!enumRange(context, prev, c, prevValue)) {
1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            return;
1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(j>0) {
1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        /* the block is not filled with all the same value */
1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        prevBlock=-1;
1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    prev=c;
1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    prevValue=value;
1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                ++c;
1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enumerate supplementary code points */
1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(l=0xd800; l<0xdc00;) {
1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* lead surrogate access */
1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT;
1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(offset==nullBlock) {
1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* no entries for a whole block of lead surrogates */
1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(prevValue!=initialValue) {
1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(prev<c) {
1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!enumRange(context, prev, c, prevValue)) {
1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevBlock=nullBlock;
1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prev=c;
1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevValue=initialValue;
1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            l+=UTRIE_DATA_BLOCK_LENGTH;
1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=UTRIE_DATA_BLOCK_LENGTH<<10;
1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)];
1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* enumerate trail surrogates for this lead surrogate */
1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        offset=trie->getFoldingOffset(value);
1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(offset<=0) {
1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* no data for this lead surrogate */
1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(prevValue!=initialValue) {
1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(prev<c) {
1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(!enumRange(context, prev, c, prevValue)) {
1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return;
1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevBlock=nullBlock;
1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prev=c;
1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevValue=initialValue;
1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* nothing else to do for the supplementary code points for this lead surrogate */
1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c+=0x400;
1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* enumerate code points for this lead surrogate */
1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i=offset;
1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            offset+=UTRIE_SURROGATE_BLOCK_COUNT;
1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            do {
1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* copy of most of the body of the BMP loop */
1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                block=idx[i]<<UTRIE_INDEX_SHIFT;
1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(block==prevBlock) {
1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* the block is the same as the previous one, and filled with value */
1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    c+=UTRIE_DATA_BLOCK_LENGTH;
1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else if(block==nullBlock) {
1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* this is the all-initial-value block */
1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(prevValue!=initialValue) {
1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(prev<c) {
1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(!enumRange(context, prev, c, prevValue)) {
1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                return;
1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        prevBlock=nullBlock;
1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        prev=c;
1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        prevValue=initialValue;
1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    c+=UTRIE_DATA_BLOCK_LENGTH;
1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    prevBlock=block;
1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) {
1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(value!=prevValue) {
1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(prev<c) {
1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                if(!enumRange(context, prev, c, prevValue)) {
1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                    return;
1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                }
1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            if(j>0) {
1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                /* the block is not filled with all the same value */
1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                prevBlock=-1;
1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            }
1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            prev=c;
1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            prevValue=value;
1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        ++c;
1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } while(++i<offset);
1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++l;
1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* deliver last range */
1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    enumRange(context, prev, c, prevValue);
1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1233