1/*
2******************************************************************************
3*
4*   Copyright (C) 2001-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*   file name:  utrie2_builder.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2008sep26 (split off from utrie2.c)
14*   created by: Markus W. Scherer
15*
16*   This is a common implementation of a Unicode trie.
17*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
18*   Unicode code points (0..0x10ffff).
19*   This is the second common version of a Unicode trie (hence the name UTrie2).
20*   See utrie2.h for a comparison.
21*
22*   This file contains only the builder code.
23*   See utrie2.c for the runtime and enumeration code.
24*/
25#ifdef UTRIE2_DEBUG
26#   include <stdio.h>
27#endif
28
29#include "unicode/utypes.h"
30#include "cmemory.h"
31#include "utrie2.h"
32#include "utrie2_impl.h"
33
34#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
35
36#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37
38/* Implementation notes ----------------------------------------------------- */
39
40/*
41 * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
42 * have been chosen to minimize trie sizes overall.
43 * Most of the code is flexible enough to work with a range of values,
44 * within certain limits.
45 *
46 * Exception: Support for separate values for lead surrogate code _units_
47 * vs. code _points_ was added after the constants were fixed,
48 * and has not been tested nor particularly designed for different constant values.
49 * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
50 * part and back.)
51 *
52 * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
53 * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
54 * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
55 * remapping) stops working.
56 *
57 * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
58 * assumes that a single index-2 block is used for 0x400 code points
59 * corresponding to one lead surrogate.
60 *
61 * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
62 * more than one Unicode plane, and the split of the index-2 table into a BMP
63 * part and a supplementary part, with a gap in between, would not work.
64 *
65 * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
66 * there is data with more than 64k distinct values,
67 * for example for Unihan collation with a separate collation weight per
68 * Han character.
69 */
70
71/* Building a trie ----------------------------------------------------------*/
72
73enum {
74    /** The null index-2 block, following the gap in the index-2 table. */
75    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
76
77    /** The start of allocated index-2 blocks. */
78    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
79
80    /**
81     * The null data block.
82     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
83     * to work with 6-bit trail bytes from 2-byte UTF-8.
84     */
85    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
86
87    /** The start of allocated data blocks. */
88    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
89
90    /**
91     * The start of data blocks for U+0800 and above.
92     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
93     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
94     * Data values for 0x780 code points beyond ASCII.
95     */
96    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
97};
98
99/* Start with allocation of 16k data entries. */
100#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
101
102/* Grow about 8x each time. */
103#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
104
105static int32_t
106allocIndex2Block(UNewTrie2 *trie);
107
108U_CAPI UTrie2 * U_EXPORT2
109utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
110    UTrie2 *trie;
111    UNewTrie2 *newTrie;
112    uint32_t *data;
113    int32_t i, j;
114
115    if(U_FAILURE(*pErrorCode)) {
116        return NULL;
117    }
118
119    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
120    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
121    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
122    if(trie==NULL || newTrie==NULL || data==NULL) {
123        uprv_free(trie);
124        uprv_free(newTrie);
125        uprv_free(data);
126        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
127        return 0;
128    }
129
130    uprv_memset(trie, 0, sizeof(UTrie2));
131    trie->initialValue=initialValue;
132    trie->errorValue=errorValue;
133    trie->highStart=0x110000;
134    trie->newTrie=newTrie;
135
136    newTrie->data=data;
137    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
138    newTrie->initialValue=initialValue;
139    newTrie->errorValue=errorValue;
140    newTrie->highStart=0x110000;
141    newTrie->firstFreeBlock=0;  /* no free block in the list */
142    newTrie->isCompacted=FALSE;
143
144    /*
145     * preallocate and reset
146     * - ASCII
147     * - the bad-UTF-8-data block
148     * - the null data block
149     */
150    for(i=0; i<0x80; ++i) {
151        newTrie->data[i]=initialValue;
152    }
153    for(; i<0xc0; ++i) {
154        newTrie->data[i]=errorValue;
155    }
156    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
157        newTrie->data[i]=initialValue;
158    }
159    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
160    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
161
162    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
163    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
164        newTrie->index2[i]=j;
165        newTrie->map[i]=1;
166    }
167    /* reference counts for the bad-UTF-8-data block */
168    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
169        newTrie->map[i]=0;
170    }
171    /*
172     * Reference counts for the null data block: all blocks except for the ASCII blocks.
173     * Plus 1 so that we don't drop this block during compaction.
174     * Plus as many as needed for lead surrogate code points.
175     */
176    /* i==newTrie->dataNullOffset */
177    newTrie->map[i++]=
178        (0x110000>>UTRIE2_SHIFT_2)-
179        (0x80>>UTRIE2_SHIFT_2)+
180        1+
181        UTRIE2_LSCP_INDEX_2_LENGTH;
182    j+=UTRIE2_DATA_BLOCK_LENGTH;
183    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
184        newTrie->map[i]=0;
185    }
186
187    /*
188     * set the remaining indexes in the BMP index-2 block
189     * to the null data block
190     */
191    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
192        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
193    }
194
195    /*
196     * Fill the index gap with impossible values so that compaction
197     * does not overlap other index-2 blocks with the gap.
198     */
199    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
200        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
201    }
202
203    /* set the indexes in the null index-2 block */
204    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
205        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
206    }
207    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
208    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
209
210    /* set the index-1 indexes for the linear index-2 block */
211    for(i=0, j=0;
212        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
213        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
214    ) {
215        newTrie->index1[i]=j;
216    }
217
218    /* set the remaining index-1 indexes to the null index-2 block */
219    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
220        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
221    }
222
223    /*
224     * Preallocate and reset data for U+0080..U+07ff,
225     * for 2-byte UTF-8 which will be compacted in 64-blocks
226     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
227     */
228    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
229        utrie2_set32(trie, i, initialValue, pErrorCode);
230    }
231
232    return trie;
233}
234
235static UNewTrie2 *
236cloneBuilder(const UNewTrie2 *other) {
237    UNewTrie2 *trie;
238
239    trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
240    if(trie==NULL) {
241        return NULL;
242    }
243
244    trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
245    if(trie->data==NULL) {
246        uprv_free(trie);
247        return NULL;
248    }
249    trie->dataCapacity=other->dataCapacity;
250
251    /* clone data */
252    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
253    uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
254    trie->index2NullOffset=other->index2NullOffset;
255    trie->index2Length=other->index2Length;
256
257    uprv_memcpy(trie->data, other->data, other->dataLength*4);
258    trie->dataNullOffset=other->dataNullOffset;
259    trie->dataLength=other->dataLength;
260
261    /* reference counters */
262    if(other->isCompacted) {
263        trie->firstFreeBlock=0;
264    } else {
265        uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
266        trie->firstFreeBlock=other->firstFreeBlock;
267    }
268
269    trie->initialValue=other->initialValue;
270    trie->errorValue=other->errorValue;
271    trie->highStart=other->highStart;
272    trie->isCompacted=other->isCompacted;
273
274    return trie;
275}
276
277U_CAPI UTrie2 * U_EXPORT2
278utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
279    UTrie2 *trie;
280
281    if(U_FAILURE(*pErrorCode)) {
282        return NULL;
283    }
284    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
285        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
286        return NULL;
287    }
288
289    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
290    if(trie==NULL) {
291        return NULL;
292    }
293    uprv_memcpy(trie, other, sizeof(UTrie2));
294
295    if(other->memory!=NULL) {
296        trie->memory=uprv_malloc(other->length);
297        if(trie->memory!=NULL) {
298            trie->isMemoryOwned=TRUE;
299            uprv_memcpy(trie->memory, other->memory, other->length);
300
301            /* make the clone's pointers point to its own memory */
302            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
303            if(other->data16!=NULL) {
304                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
305            }
306            if(other->data32!=NULL) {
307                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
308            }
309        }
310    } else /* other->newTrie!=NULL */ {
311        trie->newTrie=cloneBuilder(other->newTrie);
312    }
313
314    if(trie->memory==NULL && trie->newTrie==NULL) {
315        uprv_free(trie);
316        trie=NULL;
317    }
318    return trie;
319}
320
321typedef struct NewTrieAndStatus {
322    UTrie2 *trie;
323    UErrorCode errorCode;
324    UBool exclusiveLimit;  /* rather than inclusive range end */
325} NewTrieAndStatus;
326
327static UBool U_CALLCONV
328copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
329    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
330    if(value!=nt->trie->initialValue) {
331        if(nt->exclusiveLimit) {
332            --end;
333        }
334        if(start==end) {
335            utrie2_set32(nt->trie, start, value, &nt->errorCode);
336        } else {
337            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
338        }
339        return U_SUCCESS(nt->errorCode);
340    } else {
341        return TRUE;
342    }
343}
344
345#ifdef UTRIE2_DEBUG
346static void
347utrie_printLengths(const UTrie *trie) {
348    long indexLength=trie->indexLength;
349    long dataLength=(long)trie->dataLength;
350    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
351    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
352           indexLength, dataLength, totalLength);
353}
354
355static void
356utrie2_printLengths(const UTrie2 *trie, const char *which) {
357    long indexLength=trie->indexLength;
358    long dataLength=(long)trie->dataLength;
359    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
360    printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
361           which, indexLength, dataLength, totalLength);
362}
363#endif
364
365U_CAPI UTrie2 * U_EXPORT2
366utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
367    NewTrieAndStatus context;
368    UChar lead;
369
370    if(U_FAILURE(*pErrorCode)) {
371        return NULL;
372    }
373    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
374        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
375        return NULL;
376    }
377    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
378        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
379    }
380
381    /* Clone the frozen trie by enumerating it and building a new one. */
382    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
383    if(U_FAILURE(*pErrorCode)) {
384        return NULL;
385    }
386    context.exclusiveLimit=FALSE;
387    context.errorCode=*pErrorCode;
388    utrie2_enum(other, NULL, copyEnumRange, &context);
389    *pErrorCode=context.errorCode;
390    for(lead=0xd800; lead<0xdc00; ++lead) {
391        uint32_t value;
392        if(other->data32==NULL) {
393            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
394        } else {
395            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
396        }
397        if(value!=other->initialValue) {
398            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
399        }
400    }
401    if(U_FAILURE(*pErrorCode)) {
402        utrie2_close(context.trie);
403        context.trie=NULL;
404    }
405    return context.trie;
406}
407
408/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
409U_CAPI UTrie2 * U_EXPORT2
410utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
411    NewTrieAndStatus context;
412    UChar lead;
413
414    if(U_FAILURE(*pErrorCode)) {
415        return NULL;
416    }
417    if(trie1==NULL) {
418        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
419        return NULL;
420    }
421    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
422    if(U_FAILURE(*pErrorCode)) {
423        return NULL;
424    }
425    context.exclusiveLimit=TRUE;
426    context.errorCode=*pErrorCode;
427    utrie_enum(trie1, NULL, copyEnumRange, &context);
428    *pErrorCode=context.errorCode;
429    for(lead=0xd800; lead<0xdc00; ++lead) {
430        uint32_t value;
431        if(trie1->data32==NULL) {
432            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
433        } else {
434            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
435        }
436        if(value!=trie1->initialValue) {
437            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
438        }
439    }
440    if(U_SUCCESS(*pErrorCode)) {
441        utrie2_freeze(context.trie,
442                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
443                      pErrorCode);
444    }
445#ifdef UTRIE2_DEBUG
446    if(U_SUCCESS(*pErrorCode)) {
447        utrie_printLengths(trie1);
448        utrie2_printLengths(context.trie, "fromUTrie");
449    }
450#endif
451    if(U_FAILURE(*pErrorCode)) {
452        utrie2_close(context.trie);
453        context.trie=NULL;
454    }
455    return context.trie;
456}
457
458static inline UBool
459isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
460    int32_t i2, block;
461
462    if(U_IS_LEAD(c) && forLSCP) {
463        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
464            (c>>UTRIE2_SHIFT_2);
465    } else {
466        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
467            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
468    }
469    block=trie->index2[i2];
470    return (UBool)(block==trie->dataNullOffset);
471}
472
473static int32_t
474allocIndex2Block(UNewTrie2 *trie) {
475    int32_t newBlock, newTop;
476
477    newBlock=trie->index2Length;
478    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
479    if(newTop>LENGTHOF(trie->index2)) {
480        /*
481         * Should never occur.
482         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
483         * or the code writes more values than should be possible.
484         */
485        return -1;
486    }
487    trie->index2Length=newTop;
488    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
489    return newBlock;
490}
491
492static int32_t
493getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
494    int32_t i1, i2;
495
496    if(U_IS_LEAD(c) && forLSCP) {
497        return UTRIE2_LSCP_INDEX_2_OFFSET;
498    }
499
500    i1=c>>UTRIE2_SHIFT_1;
501    i2=trie->index1[i1];
502    if(i2==trie->index2NullOffset) {
503        i2=allocIndex2Block(trie);
504        if(i2<0) {
505            return -1;  /* program error */
506        }
507        trie->index1[i1]=i2;
508    }
509    return i2;
510}
511
512static int32_t
513allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
514    int32_t newBlock, newTop;
515
516    if(trie->firstFreeBlock!=0) {
517        /* get the first free block */
518        newBlock=trie->firstFreeBlock;
519        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
520    } else {
521        /* get a new block from the high end */
522        newBlock=trie->dataLength;
523        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
524        if(newTop>trie->dataCapacity) {
525            /* out of memory in the data array */
526            int32_t capacity;
527            uint32_t *data;
528
529            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
530                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
531            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
532                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
533            } else {
534                /*
535                 * Should never occur.
536                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
537                 * or the code writes more values than should be possible.
538                 */
539                return -1;
540            }
541            data=(uint32_t *)uprv_malloc(capacity*4);
542            if(data==NULL) {
543                return -1;
544            }
545            uprv_memcpy(data, trie->data, trie->dataLength*4);
546            uprv_free(trie->data);
547            trie->data=data;
548            trie->dataCapacity=capacity;
549        }
550        trie->dataLength=newTop;
551    }
552    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
553    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
554    return newBlock;
555}
556
557/* call when the block's reference counter reaches 0 */
558static void
559releaseDataBlock(UNewTrie2 *trie, int32_t block) {
560    /* put this block at the front of the free-block chain */
561    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
562    trie->firstFreeBlock=block;
563}
564
565static inline UBool
566isWritableBlock(UNewTrie2 *trie, int32_t block) {
567    return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
568}
569
570static inline void
571setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
572    int32_t oldBlock;
573    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
574    oldBlock=trie->index2[i2];
575    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
576        releaseDataBlock(trie, oldBlock);
577    }
578    trie->index2[i2]=block;
579}
580
581/**
582 * No error checking for illegal arguments.
583 *
584 * @return -1 if no new data block available (out of memory in data array)
585 * @internal
586 */
587static int32_t
588getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
589    int32_t i2, oldBlock, newBlock;
590
591    i2=getIndex2Block(trie, c, forLSCP);
592    if(i2<0) {
593        return -1;  /* program error */
594    }
595
596    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
597    oldBlock=trie->index2[i2];
598    if(isWritableBlock(trie, oldBlock)) {
599        return oldBlock;
600    }
601
602    /* allocate a new data block */
603    newBlock=allocDataBlock(trie, oldBlock);
604    if(newBlock<0) {
605        /* out of memory in the data array */
606        return -1;
607    }
608    setIndex2Entry(trie, i2, newBlock);
609    return newBlock;
610}
611
612/**
613 * @return TRUE if the value was successfully set
614 */
615static void
616set32(UNewTrie2 *trie,
617      UChar32 c, UBool forLSCP, uint32_t value,
618      UErrorCode *pErrorCode) {
619    int32_t block;
620
621    if(trie==NULL || trie->isCompacted) {
622        *pErrorCode=U_NO_WRITE_PERMISSION;
623        return;
624    }
625
626    block=getDataBlock(trie, c, forLSCP);
627    if(block<0) {
628        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
629        return;
630    }
631
632    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
633}
634
635U_CAPI void U_EXPORT2
636utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
637    if(U_FAILURE(*pErrorCode)) {
638        return;
639    }
640    if((uint32_t)c>0x10ffff) {
641        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
642        return;
643    }
644    set32(trie->newTrie, c, TRUE, value, pErrorCode);
645}
646
647U_CAPI void U_EXPORT2
648utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
649                                     UChar32 c, uint32_t value,
650                                     UErrorCode *pErrorCode) {
651    if(U_FAILURE(*pErrorCode)) {
652        return;
653    }
654    if(!U_IS_LEAD(c)) {
655        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
656        return;
657    }
658    set32(trie->newTrie, c, FALSE, value, pErrorCode);
659}
660
661static void
662writeBlock(uint32_t *block, uint32_t value) {
663    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
664    while(block<limit) {
665        *block++=value;
666    }
667}
668
669/**
670 * initialValue is ignored if overwrite=TRUE
671 * @internal
672 */
673static void
674fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
675          uint32_t value, uint32_t initialValue, UBool overwrite) {
676    uint32_t *pLimit;
677
678    pLimit=block+limit;
679    block+=start;
680    if(overwrite) {
681        while(block<pLimit) {
682            *block++=value;
683        }
684    } else {
685        while(block<pLimit) {
686            if(*block==initialValue) {
687                *block=value;
688            }
689            ++block;
690        }
691    }
692}
693
694U_CAPI void U_EXPORT2
695utrie2_setRange32(UTrie2 *trie,
696                  UChar32 start, UChar32 end,
697                  uint32_t value, UBool overwrite,
698                  UErrorCode *pErrorCode) {
699    /*
700     * repeat value in [start..end]
701     * mark index values for repeat-data blocks by setting bit 31 of the index values
702     * fill around existing values if any, if(overwrite)
703     */
704    UNewTrie2 *newTrie;
705    int32_t block, rest, repeatBlock;
706    UChar32 limit;
707
708    if(U_FAILURE(*pErrorCode)) {
709        return;
710    }
711    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
712        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
713        return;
714    }
715    newTrie=trie->newTrie;
716    if(newTrie==NULL || newTrie->isCompacted) {
717        *pErrorCode=U_NO_WRITE_PERMISSION;
718        return;
719    }
720    if(!overwrite && value==newTrie->initialValue) {
721        return; /* nothing to do */
722    }
723
724    limit=end+1;
725    if(start&UTRIE2_DATA_MASK) {
726        UChar32 nextStart;
727
728        /* set partial block at [start..following block boundary[ */
729        block=getDataBlock(newTrie, start, TRUE);
730        if(block<0) {
731            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
732            return;
733        }
734
735        nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
736        if(nextStart<=limit) {
737            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
738                      value, newTrie->initialValue, overwrite);
739            start=nextStart;
740        } else {
741            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
742                      value, newTrie->initialValue, overwrite);
743            return;
744        }
745    }
746
747    /* number of positions in the last, partial block */
748    rest=limit&UTRIE2_DATA_MASK;
749
750    /* round down limit to a block boundary */
751    limit&=~UTRIE2_DATA_MASK;
752
753    /* iterate over all-value blocks */
754    if(value==newTrie->initialValue) {
755        repeatBlock=newTrie->dataNullOffset;
756    } else {
757        repeatBlock=-1;
758    }
759
760    while(start<limit) {
761        int32_t i2;
762        UBool setRepeatBlock=FALSE;
763
764        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
765            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
766            continue;
767        }
768
769        /* get index value */
770        i2=getIndex2Block(newTrie, start, TRUE);
771        if(i2<0) {
772            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
773            return;
774        }
775        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
776        block=newTrie->index2[i2];
777        if(isWritableBlock(newTrie, block)) {
778            /* already allocated */
779            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
780                /*
781                 * We overwrite all values, and it's not a
782                 * protected (ASCII-linear or 2-byte UTF-8) block:
783                 * replace with the repeatBlock.
784                 */
785                setRepeatBlock=TRUE;
786            } else {
787                /* !overwrite, or protected block: just write the values into this block */
788                fillBlock(newTrie->data+block,
789                          0, UTRIE2_DATA_BLOCK_LENGTH,
790                          value, newTrie->initialValue, overwrite);
791            }
792        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
793            /*
794             * Set the repeatBlock instead of the null block or previous repeat block:
795             *
796             * If !isWritableBlock() then all entries in the block have the same value
797             * because it's the null block or a range block (the repeatBlock from a previous
798             * call to utrie2_setRange32()).
799             * No other blocks are used multiple times before compacting.
800             *
801             * The null block is the only non-writable block with the initialValue because
802             * of the repeatBlock initialization above. (If value==initialValue, then
803             * the repeatBlock will be the null data block.)
804             *
805             * We set our repeatBlock if the desired value differs from the block's value,
806             * and if we overwrite any data or if the data is all initial values
807             * (which is the same as the block being the null block, see above).
808             */
809            setRepeatBlock=TRUE;
810        }
811        if(setRepeatBlock) {
812            if(repeatBlock>=0) {
813                setIndex2Entry(newTrie, i2, repeatBlock);
814            } else {
815                /* create and set and fill the repeatBlock */
816                repeatBlock=getDataBlock(newTrie, start, TRUE);
817                if(repeatBlock<0) {
818                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
819                    return;
820                }
821                writeBlock(newTrie->data+repeatBlock, value);
822            }
823        }
824
825        start+=UTRIE2_DATA_BLOCK_LENGTH;
826    }
827
828    if(rest>0) {
829        /* set partial block at [last block boundary..limit[ */
830        block=getDataBlock(newTrie, start, TRUE);
831        if(block<0) {
832            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
833            return;
834        }
835
836        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
837    }
838
839    return;
840}
841
842/* compaction --------------------------------------------------------------- */
843
844static inline UBool
845equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
846    while(length>0 && *s==*t) {
847        ++s;
848        ++t;
849        --length;
850    }
851    return (UBool)(length==0);
852}
853
854static inline UBool
855equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
856    while(length>0 && *s==*t) {
857        ++s;
858        ++t;
859        --length;
860    }
861    return (UBool)(length==0);
862}
863
864static int32_t
865findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
866    int32_t block;
867
868    /* ensure that we do not even partially get past index2Length */
869    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
870
871    for(block=0; block<=index2Length; ++block) {
872        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
873            return block;
874        }
875    }
876    return -1;
877}
878
879static int32_t
880findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
881    int32_t block;
882
883    /* ensure that we do not even partially get past dataLength */
884    dataLength-=blockLength;
885
886    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
887        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
888            return block;
889        }
890    }
891    return -1;
892}
893
894/*
895 * Find the start of the last range in the trie by enumerating backward.
896 * Indexes for supplementary code points higher than this will be omitted.
897 */
898static UChar32
899findHighStart(UNewTrie2 *trie, uint32_t highValue) {
900    const uint32_t *data32;
901
902    uint32_t value, initialValue;
903    UChar32 c, prev;
904    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
905
906    data32=trie->data;
907    initialValue=trie->initialValue;
908
909    index2NullOffset=trie->index2NullOffset;
910    nullBlock=trie->dataNullOffset;
911
912    /* set variables for previous range */
913    if(highValue==initialValue) {
914        prevI2Block=index2NullOffset;
915        prevBlock=nullBlock;
916    } else {
917        prevI2Block=-1;
918        prevBlock=-1;
919    }
920    prev=0x110000;
921
922    /* enumerate index-2 blocks */
923    i1=UNEWTRIE2_INDEX_1_LENGTH;
924    c=prev;
925    while(c>0) {
926        i2Block=trie->index1[--i1];
927        if(i2Block==prevI2Block) {
928            /* the index-2 block is the same as the previous one, and filled with highValue */
929            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
930            continue;
931        }
932        prevI2Block=i2Block;
933        if(i2Block==index2NullOffset) {
934            /* this is the null index-2 block */
935            if(highValue!=initialValue) {
936                return c;
937            }
938            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
939        } else {
940            /* enumerate data blocks for one index-2 block */
941            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
942                block=trie->index2[i2Block+ --i2];
943                if(block==prevBlock) {
944                    /* the block is the same as the previous one, and filled with highValue */
945                    c-=UTRIE2_DATA_BLOCK_LENGTH;
946                    continue;
947                }
948                prevBlock=block;
949                if(block==nullBlock) {
950                    /* this is the null data block */
951                    if(highValue!=initialValue) {
952                        return c;
953                    }
954                    c-=UTRIE2_DATA_BLOCK_LENGTH;
955                } else {
956                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
957                        value=data32[block+ --j];
958                        if(value!=highValue) {
959                            return c;
960                        }
961                        --c;
962                    }
963                }
964            }
965        }
966    }
967
968    /* deliver last range */
969    return 0;
970}
971
972/*
973 * Compact a build-time trie.
974 *
975 * The compaction
976 * - removes blocks that are identical with earlier ones
977 * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
978 * - moves blocks in steps of the data granularity
979 * - moves and overlaps blocks that overlap with multiple values in the overlap region
980 *
981 * It does not
982 * - try to move and overlap blocks that are not already adjacent
983 */
984static void
985compactData(UNewTrie2 *trie) {
986    int32_t start, newStart, movedStart;
987    int32_t blockLength, overlap;
988    int32_t i, mapIndex, blockCount;
989
990    /* do not compact linear-ASCII data */
991    newStart=UTRIE2_DATA_START_OFFSET;
992    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
993        trie->map[i]=start;
994    }
995
996    /*
997     * Start with a block length of 64 for 2-byte UTF-8,
998     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
999     */
1000    blockLength=64;
1001    blockCount=blockLength>>UTRIE2_SHIFT_2;
1002    for(start=newStart; start<trie->dataLength;) {
1003        /*
1004         * start: index of first entry of current block
1005         * newStart: index where the current block is to be moved
1006         *           (right after current end of already-compacted data)
1007         */
1008        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
1009            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
1010            blockCount=1;
1011        }
1012
1013        /* skip blocks that are not used */
1014        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
1015            /* advance start to the next block */
1016            start+=blockLength;
1017
1018            /* leave newStart with the previous block! */
1019            continue;
1020        }
1021
1022        /* search for an identical block */
1023        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
1024             >=0
1025        ) {
1026            /* found an identical block, set the other block's index value for the current block */
1027            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1028                trie->map[mapIndex++]=movedStart;
1029                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1030            }
1031
1032            /* advance start to the next block */
1033            start+=blockLength;
1034
1035            /* leave newStart with the previous block! */
1036            continue;
1037        }
1038
1039        /* see if the beginning of this block can be overlapped with the end of the previous block */
1040        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
1041        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
1042            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
1043            overlap-=UTRIE2_DATA_GRANULARITY) {}
1044
1045        if(overlap>0 || newStart<start) {
1046            /* some overlap, or just move the whole block */
1047            movedStart=newStart-overlap;
1048            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1049                trie->map[mapIndex++]=movedStart;
1050                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1051            }
1052
1053            /* move the non-overlapping indexes to their new positions */
1054            start+=overlap;
1055            for(i=blockLength-overlap; i>0; --i) {
1056                trie->data[newStart++]=trie->data[start++];
1057            }
1058        } else /* no overlap && newStart==start */ {
1059            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1060                trie->map[mapIndex++]=start;
1061                start+=UTRIE2_DATA_BLOCK_LENGTH;
1062            }
1063            newStart=start;
1064        }
1065    }
1066
1067    /* now adjust the index-2 table */
1068    for(i=0; i<trie->index2Length; ++i) {
1069        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
1070            /* Gap indexes are invalid (-1). Skip over the gap. */
1071            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
1072        }
1073        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
1074    }
1075    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
1076
1077    /* ensure dataLength alignment */
1078    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1079        trie->data[newStart++]=trie->initialValue;
1080    }
1081
1082#ifdef UTRIE2_DEBUG
1083    /* we saved some space */
1084    printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
1085            (long)trie->dataLength, (long)newStart);
1086#endif
1087
1088    trie->dataLength=newStart;
1089}
1090
1091static void
1092compactIndex2(UNewTrie2 *trie) {
1093    int32_t i, start, newStart, movedStart, overlap;
1094
1095    /* do not compact linear-BMP index-2 blocks */
1096    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
1097    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
1098        trie->map[i]=start;
1099    }
1100
1101    /* Reduce the index table gap to what will be needed at runtime. */
1102    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
1103
1104    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
1105        /*
1106         * start: index of first entry of current block
1107         * newStart: index where the current block is to be moved
1108         *           (right after current end of already-compacted data)
1109         */
1110
1111        /* search for an identical block */
1112        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
1113             >=0
1114        ) {
1115            /* found an identical block, set the other block's index value for the current block */
1116            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
1117
1118            /* advance start to the next block */
1119            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1120
1121            /* leave newStart with the previous block! */
1122            continue;
1123        }
1124
1125        /* see if the beginning of this block can be overlapped with the end of the previous block */
1126        /* look for maximum overlap with the previous, adjacent block */
1127        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
1128            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
1129            --overlap) {}
1130
1131        if(overlap>0 || newStart<start) {
1132            /* some overlap, or just move the whole block */
1133            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
1134
1135            /* move the non-overlapping indexes to their new positions */
1136            start+=overlap;
1137            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
1138                trie->index2[newStart++]=trie->index2[start++];
1139            }
1140        } else /* no overlap && newStart==start */ {
1141            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
1142            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1143            newStart=start;
1144        }
1145    }
1146
1147    /* now adjust the index-1 table */
1148    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
1149        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
1150    }
1151    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
1152
1153    /*
1154     * Ensure data table alignment:
1155     * Needs to be granularity-aligned for 16-bit trie
1156     * (so that dataMove will be down-shiftable),
1157     * and 2-aligned for uint32_t data.
1158     */
1159    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
1160        /* Arbitrary value: 0x3fffc not possible for real data. */
1161        trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
1162    }
1163
1164#ifdef UTRIE2_DEBUG
1165    /* we saved some space */
1166    printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
1167            (long)trie->index2Length, (long)newStart);
1168#endif
1169
1170    trie->index2Length=newStart;
1171}
1172
1173static void
1174compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
1175    UNewTrie2 *newTrie;
1176    UChar32 highStart, suppHighStart;
1177    uint32_t highValue;
1178
1179    newTrie=trie->newTrie;
1180
1181    /* find highStart and round it up */
1182    highValue=utrie2_get32(trie, 0x10ffff);
1183    highStart=findHighStart(newTrie, highValue);
1184    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
1185    if(highStart==0x110000) {
1186        highValue=trie->errorValue;
1187    }
1188
1189    /*
1190     * Set trie->highStart only after utrie2_get32(trie, highStart).
1191     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
1192     */
1193    trie->highStart=newTrie->highStart=highStart;
1194
1195#ifdef UTRIE2_DEBUG
1196    printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
1197            (long)highStart, (long)highValue, (long)trie->initialValue);
1198#endif
1199
1200    if(highStart<0x110000) {
1201        /* Blank out [highStart..10ffff] to release associated data blocks. */
1202        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
1203        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
1204        if(U_FAILURE(*pErrorCode)) {
1205            return;
1206        }
1207    }
1208
1209    compactData(newTrie);
1210    if(highStart>0x10000) {
1211        compactIndex2(newTrie);
1212#ifdef UTRIE2_DEBUG
1213    } else {
1214        printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
1215                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
1216#endif
1217    }
1218
1219    /*
1220     * Store the highValue in the data array and round up the dataLength.
1221     * Must be done after compactData() because that assumes that dataLength
1222     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
1223     */
1224    newTrie->data[newTrie->dataLength++]=highValue;
1225    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1226        newTrie->data[newTrie->dataLength++]=trie->initialValue;
1227    }
1228
1229    newTrie->isCompacted=TRUE;
1230}
1231
1232/* serialization ------------------------------------------------------------ */
1233
1234/**
1235 * Maximum length of the runtime index array.
1236 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
1237 * (The actual maximum length is lower,
1238 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
1239 */
1240#define UTRIE2_MAX_INDEX_LENGTH 0xffff
1241
1242/**
1243 * Maximum length of the runtime data array.
1244 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
1245 * and by uint16_t UTrie2Header.shiftedDataLength.
1246 */
1247#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
1248
1249/* Compact and internally serialize the trie. */
1250U_CAPI void U_EXPORT2
1251utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
1252    UNewTrie2 *newTrie;
1253    UTrie2Header *header;
1254    uint32_t *p;
1255    uint16_t *dest16;
1256    int32_t i, length;
1257    int32_t allIndexesLength;
1258    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
1259    UChar32 highStart;
1260
1261    /* argument check */
1262    if(U_FAILURE(*pErrorCode)) {
1263        return;
1264    }
1265    if( trie==NULL ||
1266        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
1267    ) {
1268        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1269        return;
1270    }
1271    newTrie=trie->newTrie;
1272    if(newTrie==NULL) {
1273        /* already frozen */
1274        UTrie2ValueBits frozenValueBits=
1275            trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
1276        if(valueBits!=frozenValueBits) {
1277            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1278        }
1279        return;
1280    }
1281
1282    /* compact if necessary */
1283    if(!newTrie->isCompacted) {
1284        compactTrie(trie, pErrorCode);
1285        if(U_FAILURE(*pErrorCode)) {
1286            return;
1287        }
1288    }
1289    highStart=trie->highStart;
1290
1291    if(highStart<=0x10000) {
1292        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
1293    } else {
1294        allIndexesLength=newTrie->index2Length;
1295    }
1296    if(valueBits==UTRIE2_16_VALUE_BITS) {
1297        dataMove=allIndexesLength;
1298    } else {
1299        dataMove=0;
1300    }
1301
1302    /* are indexLength and dataLength within limits? */
1303    if( /* for unshifted indexLength */
1304        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
1305        /* for unshifted dataNullOffset */
1306        (dataMove+newTrie->dataNullOffset)>0xffff ||
1307        /* for unshifted 2-byte UTF-8 index-2 values */
1308        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
1309        /* for shiftedDataLength */
1310        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
1311    ) {
1312        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1313        return;
1314    }
1315
1316    /* calculate the total serialized length */
1317    length=sizeof(UTrie2Header)+allIndexesLength*2;
1318    if(valueBits==UTRIE2_16_VALUE_BITS) {
1319        length+=newTrie->dataLength*2;
1320    } else {
1321        length+=newTrie->dataLength*4;
1322    }
1323
1324    trie->memory=uprv_malloc(length);
1325    if(trie->memory==NULL) {
1326        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1327        return;
1328    }
1329    trie->length=length;
1330    trie->isMemoryOwned=TRUE;
1331
1332    trie->indexLength=allIndexesLength;
1333    trie->dataLength=newTrie->dataLength;
1334    if(highStart<=0x10000) {
1335        trie->index2NullOffset=0xffff;
1336    } else {
1337        trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
1338    }
1339    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
1340    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
1341
1342    /* set the header fields */
1343    header=(UTrie2Header *)trie->memory;
1344
1345    header->signature=UTRIE2_SIG; /* "Tri2" */
1346    header->options=(uint16_t)valueBits;
1347
1348    header->indexLength=(uint16_t)trie->indexLength;
1349    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
1350    header->index2NullOffset=trie->index2NullOffset;
1351    header->dataNullOffset=trie->dataNullOffset;
1352    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
1353
1354    /* fill the index and data arrays */
1355    dest16=(uint16_t *)(header+1);
1356    trie->index=dest16;
1357
1358    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
1359    p=(uint32_t *)newTrie->index2;
1360    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
1361        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1362    }
1363
1364    /* write UTF-8 2-byte index-2 values, not right-shifted */
1365    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
1366        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
1367    }
1368    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
1369        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
1370    }
1371
1372    if(highStart>0x10000) {
1373        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
1374        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
1375
1376        /* write 16-bit index-1 values for supplementary code points */
1377        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
1378        for(i=index1Length; i>0; --i) {
1379            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
1380        }
1381
1382        /*
1383         * write the index-2 array values for supplementary code points,
1384         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
1385         */
1386        p=(uint32_t *)newTrie->index2+index2Offset;
1387        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
1388            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1389        }
1390    }
1391
1392    /* write the 16/32-bit data array */
1393    switch(valueBits) {
1394    case UTRIE2_16_VALUE_BITS:
1395        /* write 16-bit data values */
1396        trie->data16=dest16;
1397        trie->data32=NULL;
1398        p=newTrie->data;
1399        for(i=newTrie->dataLength; i>0; --i) {
1400            *dest16++=(uint16_t)*p++;
1401        }
1402        break;
1403    case UTRIE2_32_VALUE_BITS:
1404        /* write 32-bit data values */
1405        trie->data16=NULL;
1406        trie->data32=(uint32_t *)dest16;
1407        uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
1408        break;
1409    default:
1410        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1411        return;
1412    }
1413
1414    /* Delete the UNewTrie2. */
1415    uprv_free(newTrie->data);
1416    uprv_free(newTrie);
1417    trie->newTrie=NULL;
1418}
1419
1420U_CAPI UBool U_EXPORT2
1421utrie2_isFrozen(const UTrie2 *trie) {
1422    return (UBool)(trie->newTrie==NULL);
1423}
1424
1425U_CAPI int32_t U_EXPORT2
1426utrie2_serialize(UTrie2 *trie,
1427                 void *data, int32_t capacity,
1428                 UErrorCode *pErrorCode) {
1429    /* argument check */
1430    if(U_FAILURE(*pErrorCode)) {
1431        return 0;
1432    }
1433
1434    if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
1435        capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
1436    ) {
1437        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1438        return 0;
1439    }
1440
1441    if(capacity>=trie->length) {
1442        uprv_memcpy(data, trie->memory, trie->length);
1443    } else {
1444        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1445    }
1446    return trie->length;
1447}
1448
1449/*
1450 * This is here to avoid a dependency from utrie2.cpp on utrie.c.
1451 * This file already depends on utrie.c.
1452 * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
1453 */
1454U_CAPI int32_t U_EXPORT2
1455utrie2_swapAnyVersion(const UDataSwapper *ds,
1456                      const void *inData, int32_t length, void *outData,
1457                      UErrorCode *pErrorCode) {
1458    if(U_SUCCESS(*pErrorCode)) {
1459        switch(utrie2_getVersion(inData, length, TRUE)) {
1460        case 1:
1461            return utrie_swap(ds, inData, length, outData, pErrorCode);
1462        case 2:
1463            return utrie2_swap(ds, inData, length, outData, pErrorCode);
1464        default:
1465            *pErrorCode=U_INVALID_FORMAT_ERROR;
1466            return 0;
1467        }
1468    }
1469    return 0;
1470}
1471