1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Copyright (C) 2001-2010, International Business Machines
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Corporation and others.  All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   file name:  utrie2_builder.c
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   encoding:   US-ASCII
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   tab size:   8 (not used)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   indentation:4
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created on: 2008sep26 (split off from utrie2.c)
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   created by: Markus W. Scherer
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   This is a common implementation of a Unicode trie.
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   Unicode code points (0..0x10ffff).
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   This is the second common version of a Unicode trie (hence the name UTrie2).
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   See utrie2.h for a comparison.
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   This file contains only the builder code.
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*   See utrie2.c for the runtime and enumeration code.
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#   include <stdio.h>
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2.h"
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2_impl.h"
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Implementation notes ----------------------------------------------------- */
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * have been chosen to minimize trie sizes overall.
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Most of the code is flexible enough to work with a range of values,
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * within certain limits.
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Exception: Support for separate values for lead surrogate code _units_
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * vs. code _points_ was added after the constants were fixed,
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and has not been tested nor particularly designed for different constant values.
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * part and back.)
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * remapping) stops working.
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * assumes that a single index-2 block is used for 0x400 code points
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * corresponding to one lead surrogate.
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * more than one Unicode plane, and the split of the index-2 table into a BMP
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * part and a supplementary part, with a gap in between, would not work.
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * there is data with more than 64k distinct values,
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for example for Unihan collation with a separate collation weight per
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Han character.
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Building a trie ----------------------------------------------------------*/
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum {
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** The null index-2 block, following the gap in the index-2 table. */
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** The start of allocated index-2 blocks. */
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * The null data block.
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * to work with 6-bit trail bytes from 2-byte UTF-8.
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /** The start of allocated data blocks. */
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /**
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * The start of data blocks for U+0800 and above.
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Data values for 0x780 code points beyond ASCII.
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)};
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Start with allocation of 16k data entries. */
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Grow about 8x each time. */
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocIndex2Block(UNewTrie2 *trie);
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrie2 *trie;
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie2 *newTrie;
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *data;
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, j;
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || newTrie==NULL || data==NULL) {
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(trie);
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(newTrie);
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(data);
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memset(trie, 0, sizeof(UTrie2));
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->initialValue=initialValue;
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->errorValue=errorValue;
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->highStart=0x110000;
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->newTrie=newTrie;
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->data=data;
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->initialValue=initialValue;
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->errorValue=errorValue;
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->highStart=0x110000;
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->firstFreeBlock=0;  /* no free block in the list */
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->isCompacted=FALSE;
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * preallocate and reset
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * - ASCII
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * - the bad-UTF-8-data block
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * - the null data block
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<0x80; ++i) {
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->data[i]=initialValue;
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(; i<0xc0; ++i) {
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->data[i]=errorValue;
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->data[i]=initialValue;
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index2[i]=j;
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->map[i]=1;
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* reference counts for the bad-UTF-8-data block */
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->map[i]=0;
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Reference counts for the null data block: all blocks except for the ASCII blocks.
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Plus 1 so that we don't drop this block during compaction.
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Plus as many as needed for lead surrogate code points.
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* i==newTrie->dataNullOffset */
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->map[i++]=
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (0x110000>>UTRIE2_SHIFT_2)-
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (0x80>>UTRIE2_SHIFT_2)+
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        1+
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTRIE2_LSCP_INDEX_2_LENGTH;
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    j+=UTRIE2_DATA_BLOCK_LENGTH;
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->map[i]=0;
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * set the remaining indexes in the BMP index-2 block
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * to the null data block
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Fill the index gap with impossible values so that compaction
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * does not overlap other index-2 blocks with the gap.
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the indexes in the null index-2 block */
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the index-1 indexes for the linear index-2 block */
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0, j=0;
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index1[i]=j;
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the remaining index-1 indexes to the null index-2 block */
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Preallocate and reset data for U+0080..U+07ff,
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * for 2-byte UTF-8 which will be compacted in 64-blocks
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_set32(trie, i, initialValue, pErrorCode);
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie;
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UNewTrie2 *
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cloneBuilder(const UNewTrie2 *other) {
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie2 *trie;
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL) {
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->data==NULL) {
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(trie);
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataCapacity=other->dataCapacity;
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* clone data */
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2NullOffset=other->index2NullOffset;
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2Length=other->index2Length;
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->data, other->data, other->dataLength*4);
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataNullOffset=other->dataNullOffset;
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=other->dataLength;
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* reference counters */
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other->isCompacted) {
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->firstFreeBlock=0;
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->firstFreeBlock=other->firstFreeBlock;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->initialValue=other->initialValue;
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->errorValue=other->errorValue;
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->highStart=other->highStart;
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isCompacted=other->isCompacted;
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie;
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrie2 *trie;
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL) {
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie, other, sizeof(UTrie2));
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other->memory!=NULL) {
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->memory=uprv_malloc(other->length);
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->memory!=NULL) {
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->isMemoryOwned=TRUE;
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_memcpy(trie->memory, other->memory, other->length);
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* make the clone's pointers point to its own memory */
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(other->data16!=NULL) {
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(other->data32!=NULL) {
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else /* other->newTrie!=NULL */ {
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->newTrie=cloneBuilder(other->newTrie);
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->memory==NULL && trie->newTrie==NULL) {
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_free(trie);
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie=NULL;
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie;
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct NewTrieAndStatus {
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrie2 *trie;
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode;
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UBool exclusiveLimit;  /* rather than inclusive range end */
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} NewTrieAndStatus;
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool U_CALLCONV
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(value!=nt->trie->initialValue) {
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(nt->exclusiveLimit) {
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --end;
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(start==end) {
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie2_set32(nt->trie, start, value, &nt->errorCode);
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return U_SUCCESS(nt->errorCode);
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return TRUE;
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_printLengths(const UTrie *trie) {
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long indexLength=trie->indexLength;
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long dataLength=(long)trie->dataLength;
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           indexLength, dataLength, totalLength);
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_printLengths(const UTrie2 *trie, const char *which) {
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long indexLength=trie->indexLength;
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long dataLength=(long)trie->dataLength;
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)           which, indexLength, dataLength, totalLength);
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    NewTrieAndStatus context;
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar lead;
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Clone the frozen trie by enumerating it and building a new one. */
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.exclusiveLimit=FALSE;
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.errorCode=*pErrorCode;
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utrie2_enum(other, NULL, copyEnumRange, &context);
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *pErrorCode=context.errorCode;
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(lead=0xd800; lead<0xdc00; ++lead) {
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uint32_t value;
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(other->data32==NULL) {
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(value!=other->initialValue) {
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_close(context.trie);
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        context.trie=NULL;
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return context.trie;
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    NewTrieAndStatus context;
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar lead;
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie1==NULL) {
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return NULL;
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.exclusiveLimit=TRUE;
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    context.errorCode=*pErrorCode;
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    utrie_enum(trie1, NULL, copyEnumRange, &context);
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    *pErrorCode=context.errorCode;
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(lead=0xd800; lead<0xdc00; ++lead) {
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uint32_t value;
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie1->data32==NULL) {
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(value!=trie1->initialValue) {
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(*pErrorCode)) {
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_freeze(context.trie,
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      pErrorCode);
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(*pErrorCode)) {
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie_printLengths(trie1);
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_printLengths(context.trie, "fromUTrie");
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_close(context.trie);
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        context.trie=NULL;
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return context.trie;
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i2, block;
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_IS_LEAD(c) && forLSCP) {
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (c>>UTRIE2_SHIFT_2);
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block=trie->index2[i2];
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(block==trie->dataNullOffset);
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocIndex2Block(UNewTrie2 *trie) {
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t newBlock, newTop;
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newBlock=trie->index2Length;
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newTop>LENGTHOF(trie->index2)) {
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * Should never occur.
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * or the code writes more values than should be possible.
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         */
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2Length=newTop;
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return newBlock;
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i1, i2;
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_IS_LEAD(c) && forLSCP) {
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return UTRIE2_LSCP_INDEX_2_OFFSET;
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i1=c>>UTRIE2_SHIFT_1;
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i2=trie->index1[i1];
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(i2==trie->index2NullOffset) {
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2=allocIndex2Block(trie);
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i2<0) {
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return -1;  /* program error */
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index1[i1]=i2;
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return i2;
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t newBlock, newTop;
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->firstFreeBlock!=0) {
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* get the first free block */
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newBlock=trie->firstFreeBlock;
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* get a new block from the high end */
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newBlock=trie->dataLength;
523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(newTop>trie->dataCapacity) {
525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* out of memory in the data array */
526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            int32_t capacity;
527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uint32_t *data;
528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /*
535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * Should never occur.
536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * or the code writes more values than should be possible.
538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 */
539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return -1;
540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            data=(uint32_t *)uprv_malloc(capacity*4);
542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(data==NULL) {
543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return -1;
544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_memcpy(data, trie->data, trie->dataLength*4);
546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            uprv_free(trie->data);
547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->data=data;
548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->dataCapacity=capacity;
549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->dataLength=newTop;
551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return newBlock;
555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* call when the block's reference counter reaches 0 */
558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)releaseDataBlock(UNewTrie2 *trie, int32_t block) {
560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* put this block at the front of the free-block chain */
561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->firstFreeBlock=block;
563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool
566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)isWritableBlock(UNewTrie2 *trie, int32_t block) {
567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE void
571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t oldBlock;
573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    oldBlock=trie->index2[i2];
575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        releaseDataBlock(trie, oldBlock);
577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2[i2]=block;
579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * No error checking for illegal arguments.
583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return -1 if no new data block available (out of memory in data array)
585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i2, oldBlock, newBlock;
590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i2=getIndex2Block(trie, c, forLSCP);
592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(i2<0) {
593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;  /* program error */
594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    oldBlock=trie->index2[i2];
598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(isWritableBlock(trie, oldBlock)) {
599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return oldBlock;
600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* allocate a new data block */
603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newBlock=allocDataBlock(trie, oldBlock);
604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newBlock<0) {
605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* out of memory in the data array */
606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return -1;
607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    setIndex2Entry(trie, i2, newBlock);
609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return newBlock;
610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return TRUE if the value was successfully set
614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)set32(UNewTrie2 *trie,
617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      UChar32 c, UBool forLSCP, uint32_t value,
618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)      UErrorCode *pErrorCode) {
619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie==NULL || trie->isCompacted) {
622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_NO_WRITE_PERMISSION;
623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block=getDataBlock(trie, c, forLSCP);
627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(block<0) {
628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((uint32_t)c>0x10ffff) {
641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set32(trie->newTrie, c, TRUE, value, pErrorCode);
645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     UChar32 c, uint32_t value,
650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                                     UErrorCode *pErrorCode) {
651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!U_IS_LEAD(c)) {
655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    set32(trie->newTrie, c, FALSE, value, pErrorCode);
659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)writeBlock(uint32_t *block, uint32_t value) {
663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(block<limit) {
665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *block++=value;
666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * initialValue is ignored if overwrite=TRUE
671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal
672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)          uint32_t value, uint32_t initialValue, UBool overwrite) {
676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *pLimit;
677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    pLimit=block+limit;
679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    block+=start;
680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(overwrite) {
681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(block<pLimit) {
682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *block++=value;
683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        while(block<pLimit) {
686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(*block==initialValue) {
687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                *block=value;
688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            ++block;
690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_setRange32(UTrie2 *trie,
696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  UChar32 start, UChar32 end,
697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  uint32_t value, UBool overwrite,
698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                  UErrorCode *pErrorCode) {
699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * repeat value in [start..end]
701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * mark index values for repeat-data blocks by setting bit 31 of the index values
702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * fill around existing values if any, if(overwrite)
703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie2 *newTrie;
705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block, rest, repeatBlock;
706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 limit;
707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie=trie->newTrie;
716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newTrie==NULL || newTrie->isCompacted) {
717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_NO_WRITE_PERMISSION;
718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!overwrite && value==newTrie->initialValue) {
721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return; /* nothing to do */
722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    limit=end+1;
725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(start&UTRIE2_DATA_MASK) {
726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 nextStart;
727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* set partial block at [start..following block boundary[ */
729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=getDataBlock(newTrie, start, TRUE);
730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block<0) {
731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(nextStart<=limit) {
737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      value, newTrie->initialValue, overwrite);
739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start=nextStart;
740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      value, newTrie->initialValue, overwrite);
743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* number of positions in the last, partial block */
748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    rest=limit&UTRIE2_DATA_MASK;
749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* round down limit to a block boundary */
751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    limit&=~UTRIE2_DATA_MASK;
752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* iterate over all-value blocks */
754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(value==newTrie->initialValue) {
755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        repeatBlock=newTrie->dataNullOffset;
756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        repeatBlock=-1;
758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(start<limit) {
761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t i2;
762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UBool setRepeatBlock=FALSE;
763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* get index value */
770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2=getIndex2Block(newTrie, start, TRUE);
771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i2<0) {
772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=newTrie->index2[i2];
777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(isWritableBlock(newTrie, block)) {
778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* already allocated */
779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /*
781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * We overwrite all values, and it's not a
782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * protected (ASCII-linear or 2-byte UTF-8) block:
783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 * replace with the repeatBlock.
784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 */
785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                setRepeatBlock=TRUE;
786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* !overwrite, or protected block: just write the values into this block */
788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                fillBlock(newTrie->data+block,
789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          0, UTRIE2_DATA_BLOCK_LENGTH,
790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                          value, newTrie->initialValue, overwrite);
791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /*
794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * Set the repeatBlock instead of the null block or previous repeat block:
795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             *
796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * If !isWritableBlock() then all entries in the block have the same value
797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * because it's the null block or a range block (the repeatBlock from a previous
798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * call to utrie2_setRange32()).
799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * No other blocks are used multiple times before compacting.
800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             *
801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * The null block is the only non-writable block with the initialValue because
802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * of the repeatBlock initialization above. (If value==initialValue, then
803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * the repeatBlock will be the null data block.)
804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             *
805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * We set our repeatBlock if the desired value differs from the block's value,
806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * and if we overwrite any data or if the data is all initial values
807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             * (which is the same as the block being the null block, see above).
808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             */
809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            setRepeatBlock=TRUE;
810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(setRepeatBlock) {
812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(repeatBlock>=0) {
813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                setIndex2Entry(newTrie, i2, repeatBlock);
814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                /* create and set and fill the repeatBlock */
816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                repeatBlock=getDataBlock(newTrie, start, TRUE);
817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(repeatBlock<0) {
818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    return;
820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                writeBlock(newTrie->data+repeatBlock, value);
822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        start+=UTRIE2_DATA_BLOCK_LENGTH;
826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(rest>0) {
829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* set partial block at [last block boundary..limit[ */
830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        block=getDataBlock(newTrie, start, TRUE);
831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(block<0) {
832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return;
840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* compaction --------------------------------------------------------------- */
843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool
845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(length>0 && *s==*t) {
847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++s;
848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++t;
849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        --length;
850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(length==0);
852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool
855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(length>0 && *s==*t) {
857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++s;
858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ++t;
859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        --length;
860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(length==0);
862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* ensure that we do not even partially get past index2Length */
869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(block=0; block<=index2Length; ++block) {
872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return block;
874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return -1;
877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t
880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t block;
882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* ensure that we do not even partially get past dataLength */
884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dataLength-=blockLength;
885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return block;
889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return -1;
892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Find the start of the last range in the trie by enumerating backward.
896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Indexes for supplementary code points higher than this will be omitted.
897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32
899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findHighStart(UNewTrie2 *trie, uint32_t highValue) {
900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const uint32_t *data32;
901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t value, initialValue;
903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 c, prev;
904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    data32=trie->data;
907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    initialValue=trie->initialValue;
908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    index2NullOffset=trie->index2NullOffset;
910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nullBlock=trie->dataNullOffset;
911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set variables for previous range */
913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highValue==initialValue) {
914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevI2Block=index2NullOffset;
915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevBlock=nullBlock;
916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevI2Block=-1;
918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevBlock=-1;
919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    prev=0x110000;
921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* enumerate index-2 blocks */
923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    i1=UNEWTRIE2_INDEX_1_LENGTH;
924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    c=prev;
925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(c>0) {
926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        i2Block=trie->index1[--i1];
927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i2Block==prevI2Block) {
928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* the index-2 block is the same as the previous one, and filled with highValue */
929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        prevI2Block=i2Block;
933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i2Block==index2NullOffset) {
934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* this is the null index-2 block */
935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(highValue!=initialValue) {
936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                return c;
937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* enumerate data blocks for one index-2 block */
941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                block=trie->index2[i2Block+ --i2];
943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(block==prevBlock) {
944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* the block is the same as the previous one, and filled with highValue */
945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    c-=UTRIE2_DATA_BLOCK_LENGTH;
946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    continue;
947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                prevBlock=block;
949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                if(block==nullBlock) {
950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    /* this is the null data block */
951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    if(highValue!=initialValue) {
952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        return c;
953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    c-=UTRIE2_DATA_BLOCK_LENGTH;
955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                } else {
956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        value=data32[block+ --j];
958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        if(value!=highValue) {
959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                            return c;
960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        }
961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        --c;
962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    }
963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                }
964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* deliver last range */
969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0;
970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact a build-time trie.
974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The compaction
976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - removes blocks that are identical with earlier ones
977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves blocks in steps of the data granularity
979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves and overlaps blocks that overlap with multiple values in the overlap region
980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It does not
982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - try to move and overlap blocks that are not already adjacent
983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactData(UNewTrie2 *trie) {
986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t start, newStart, movedStart;
987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t blockLength, overlap;
988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, mapIndex, blockCount;
989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* do not compact linear-ASCII data */
991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newStart=UTRIE2_DATA_START_OFFSET;
992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->map[i]=start;
994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Start with a block length of 64 for 2-byte UTF-8,
998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    blockLength=64;
1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    blockCount=blockLength>>UTRIE2_SHIFT_2;
1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(start=newStart; start<trie->dataLength;) {
1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*
1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * start: index of first entry of current block
1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * newStart: index where the current block is to be moved
1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         *           (right after current end of already-compacted data)
1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         */
1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            blockCount=1;
1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* skip blocks that are not used */
1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* advance start to the next block */
1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=blockLength;
1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* leave newStart with the previous block! */
1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* search for an identical block */
1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             >=0
1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ) {
1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* found an identical block, set the other block's index value for the current block */
1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->map[mapIndex++]=movedStart;
1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* advance start to the next block */
1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=blockLength;
1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* leave newStart with the previous block! */
1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* see if the beginning of this block can be overlapped with the end of the previous block */
1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            overlap-=UTRIE2_DATA_GRANULARITY) {}
1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(overlap>0 || newStart<start) {
1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* some overlap, or just move the whole block */
1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            movedStart=newStart-overlap;
1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->map[mapIndex++]=movedStart;
1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* move the non-overlapping indexes to their new positions */
1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=overlap;
1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=blockLength-overlap; i>0; --i) {
1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->data[newStart++]=trie->data[start++];
1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else /* no overlap && newStart==start */ {
1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->map[mapIndex++]=start;
1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                start+=UTRIE2_DATA_BLOCK_LENGTH;
1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            newStart=start;
1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* now adjust the index-2 table */
1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<trie->index2Length; ++i) {
1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* Gap indexes are invalid (-1). Skip over the gap. */
1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* ensure dataLength alignment */
1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data[newStart++]=trie->initialValue;
1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* we saved some space */
1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (long)trie->dataLength, (long)newStart);
1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=newStart;
1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactIndex2(UNewTrie2 *trie) {
1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, start, newStart, movedStart, overlap;
1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* do not compact linear-BMP index-2 blocks */
1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->map[i]=start;
1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Reduce the index table gap to what will be needed at runtime. */
1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*
1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * start: index of first entry of current block
1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * newStart: index where the current block is to be moved
1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         *           (right after current end of already-compacted data)
1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         */
1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* search for an identical block */
1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)             >=0
1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        ) {
1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* found an identical block, set the other block's index value for the current block */
1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* advance start to the next block */
1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* leave newStart with the previous block! */
1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            continue;
1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* see if the beginning of this block can be overlapped with the end of the previous block */
1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* look for maximum overlap with the previous, adjacent block */
1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            --overlap) {}
1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(overlap>0 || newStart<start) {
1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* some overlap, or just move the whole block */
1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            /* move the non-overlapping indexes to their new positions */
1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=overlap;
1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                trie->index2[newStart++]=trie->index2[start++];
1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else /* no overlap && newStart==start */ {
1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            newStart=start;
1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* now adjust the index-1 table */
1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Ensure data table alignment:
1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Needs to be granularity-aligned for 16-bit trie
1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * (so that dataMove will be down-shiftable),
1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * and 2-aligned for uint32_t data.
1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* Arbitrary value: 0x3fffc not possible for real data. */
1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* we saved some space */
1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (long)trie->index2Length, (long)newStart);
1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index2Length=newStart;
1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void
1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie2 *newTrie;
1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 highStart, suppHighStart;
1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t highValue;
1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie=trie->newTrie;
1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* find highStart and round it up */
1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    highValue=utrie2_get32(trie, 0x10ffff);
1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    highStart=findHighStart(newTrie, highValue);
1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart==0x110000) {
1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        highValue=trie->errorValue;
1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Set trie->highStart only after utrie2_get32(trie, highStart).
1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->highStart=newTrie->highStart=highStart;
1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            (long)highStart, (long)highValue, (long)trie->initialValue);
1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart<0x110000) {
1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* Blank out [highStart..10ffff] to release associated data blocks. */
1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_FAILURE(*pErrorCode)) {
1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    compactData(newTrie);
1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart>0x10000) {
1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        compactIndex2(newTrie);
1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG
1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif
1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /*
1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Store the highValue in the data array and round up the dataLength.
1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * Must be done after compactData() because that assumes that dataLength
1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)     */
1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->data[newTrie->dataLength++]=highValue;
1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        newTrie->data[newTrie->dataLength++]=trie->initialValue;
1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie->isCompacted=TRUE;
1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* serialization ------------------------------------------------------------ */
1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the runtime index array.
1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (The actual maximum length is lower,
1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE2_MAX_INDEX_LENGTH 0xffff
1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the runtime data array.
1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and by uint16_t UTrie2Header.shiftedDataLength.
1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Compact and internally serialize the trie. */
1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2
1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UNewTrie2 *newTrie;
1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UTrie2Header *header;
1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint32_t *p;
1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uint16_t *dest16;
1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t i, length;
1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t allIndexesLength;
1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UChar32 highStart;
1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* argument check */
1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( trie==NULL ||
1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    newTrie=trie->newTrie;
1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(newTrie==NULL) {
1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* already frozen */
1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UTrie2ValueBits frozenValueBits=
1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(valueBits!=frozenValueBits) {
1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* compact if necessary */
1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!newTrie->isCompacted) {
1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        compactTrie(trie, pErrorCode);
1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_FAILURE(*pErrorCode)) {
1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return;
1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    highStart=trie->highStart;
1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart<=0x10000) {
1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        allIndexesLength=newTrie->index2Length;
1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(valueBits==UTRIE2_16_VALUE_BITS) {
1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataMove=allIndexesLength;
1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        dataMove=0;
1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* are indexLength and dataLength within limits? */
1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( /* for unshifted indexLength */
1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* for unshifted dataNullOffset */
1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (dataMove+newTrie->dataNullOffset)>0xffff ||
1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* for unshifted 2-byte UTF-8 index-2 values */
1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* for shiftedDataLength */
1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* calculate the total serialized length */
1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    length=sizeof(UTrie2Header)+allIndexesLength*2;
1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(valueBits==UTRIE2_16_VALUE_BITS) {
1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length+=newTrie->dataLength*2;
1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        length+=newTrie->dataLength*4;
1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->memory=uprv_malloc(length);
1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(trie->memory==NULL) {
1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->length=length;
1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->isMemoryOwned=TRUE;
1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->indexLength=allIndexesLength;
1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataLength=newTrie->dataLength;
1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart<=0x10000) {
1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index2NullOffset=0xffff;
1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* set the header fields */
1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header=(UTrie2Header *)trie->memory;
1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->signature=UTRIE2_SIG; /* "Tri2" */
1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->options=(uint16_t)valueBits;
1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->indexLength=(uint16_t)trie->indexLength;
1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->index2NullOffset=trie->index2NullOffset;
1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->dataNullOffset=trie->dataNullOffset;
1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* fill the index and data arrays */
1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    dest16=(uint16_t *)(header+1);
1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->index=dest16;
1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    p=(uint32_t *)newTrie->index2;
1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write UTF-8 2-byte index-2 values, not right-shifted */
1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(highStart>0x10000) {
1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 16-bit index-1 values for supplementary code points */
1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=index1Length; i>0; --i) {
1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /*
1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * write the index-2 array values for supplementary code points,
1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)         */
1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=(uint32_t *)newTrie->index2+index2Offset;
1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* write the 16/32-bit data array */
1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    switch(valueBits) {
1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case UTRIE2_16_VALUE_BITS:
1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 16-bit data values */
1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data16=dest16;
1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=NULL;
1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        p=newTrie->data;
1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        for(i=newTrie->dataLength; i>0; --i) {
1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *dest16++=(uint16_t)*p++;
1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    case UTRIE2_32_VALUE_BITS:
1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        /* write 32-bit data values */
1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data16=NULL;
1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        trie->data32=(uint32_t *)dest16;
1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        break;
1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    default:
1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* Delete the UNewTrie2. */
1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_free(newTrie->data);
1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    uprv_free(newTrie);
1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    trie->newTrie=NULL;
1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2
1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_isFrozen(const UTrie2 *trie) {
1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (UBool)(trie->newTrie==NULL);
1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_serialize(UTrie2 *trie,
1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 void *data, int32_t capacity,
1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                 UErrorCode *pErrorCode) {
1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    /* argument check */
1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(*pErrorCode)) {
1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    ) {
1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return 0;
1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(capacity>=trie->length) {
1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        uprv_memcpy(data, trie->memory, trie->length);
1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return trie->length;
1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is here to avoid a dependency from utrie2.cpp on utrie.c.
1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This file already depends on utrie.c.
1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2
1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_swapAnyVersion(const UDataSwapper *ds,
1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      const void *inData, int32_t length, void *outData,
1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UErrorCode *pErrorCode) {
1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(*pErrorCode)) {
1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        switch(utrie2_getVersion(inData, length, TRUE)) {
1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 1:
1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return utrie_swap(ds, inData, length, outData, pErrorCode);
1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        case 2:
1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return utrie2_swap(ds, inData, length, outData, pErrorCode);
1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        default:
1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            *pErrorCode=U_INVALID_FORMAT_ERROR;
1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return 0;
1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return 0;
1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
1471