185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho******************************************************************************
385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
427f654740f2a26ad62a5c155af9199af9e69b889claireho*   Copyright (C) 2001-2010, International Business Machines
585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Corporation and others.  All Rights Reserved.
685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho******************************************************************************
885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   file name:  utrie2_builder.c
985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   encoding:   US-ASCII
1085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   tab size:   8 (not used)
1185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   indentation:4
1285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
1385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   created on: 2008sep26 (split off from utrie2.c)
1485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   created by: Markus W. Scherer
1585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
1685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   This is a common implementation of a Unicode trie.
1785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
1885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Unicode code points (0..0x10ffff).
1985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   This is the second common version of a Unicode trie (hence the name UTrie2).
2085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   See utrie2.h for a comparison.
2185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*
2285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   This file contains only the builder code.
2385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   See utrie2.c for the runtime and enumeration code.
2485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*/
2585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
2685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#   include <stdio.h>
2785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
2885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
2985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "unicode/utypes.h"
3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "cmemory.h"
3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2.h"
3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2_impl.h"
3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
3427f654740f2a26ad62a5c155af9199af9e69b889claireho#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
3885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Implementation notes ----------------------------------------------------- */
3985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
4185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
4285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * have been chosen to minimize trie sizes overall.
4385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Most of the code is flexible enough to work with a range of values,
4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * within certain limits.
4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
4685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Exception: Support for separate values for lead surrogate code _units_
4785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * vs. code _points_ was added after the constants were fixed,
4885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and has not been tested nor particularly designed for different constant values.
4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
5085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * part and back.)
5185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * remapping) stops working.
5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * assumes that a single index-2 block is used for 0x400 code points
5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * corresponding to one lead surrogate.
6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
6185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
6285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * more than one Unicode plane, and the split of the index-2 table into a BMP
6385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * part and a supplementary part, with a gap in between, would not work.
6485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * there is data with more than 64k distinct values,
6785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * for example for Unihan collation with a separate collation weight per
6885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Han character.
6985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
7085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Building a trie ----------------------------------------------------------*/
7285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
7385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoenum {
7485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /** The null index-2 block, following the gap in the index-2 table. */
7585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
7685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
7785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /** The start of allocated index-2 blocks. */
7885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
7985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
8085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /**
8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * The null data block.
8285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
8385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * to work with 6-bit trail bytes from 2-byte UTF-8.
8485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
8585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
8785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /** The start of allocated data blocks. */
8885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
8985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
9085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /**
9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * The start of data blocks for U+0800 and above.
9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
9385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
9485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Data values for 0x780 code points beyond ASCII.
9585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
9685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
9785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho};
9885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
9985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Start with allocation of 16k data entries. */
10085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
10185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Grow about 8x each time. */
10385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
10485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
10585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
10685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocIndex2Block(UNewTrie2 *trie);
10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
10885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2
10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
11085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UTrie2 *trie;
11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNewTrie2 *newTrie;
11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t *data;
11385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, j;
11485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
11585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
11685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
11785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
11885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
11985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
12185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
12285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie==NULL || newTrie==NULL || data==NULL) {
12385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_free(trie);
12485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_free(newTrie);
12585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_free(data);
12685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
12785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return 0;
12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memset(trie, 0, sizeof(UTrie2));
13185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->initialValue=initialValue;
13285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->errorValue=errorValue;
13385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->highStart=0x110000;
13485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->newTrie=newTrie;
13585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
13685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->data=data;
13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->initialValue=initialValue;
13985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->errorValue=errorValue;
14085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->highStart=0x110000;
14185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->firstFreeBlock=0;  /* no free block in the list */
14285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->isCompacted=FALSE;
14385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * preallocate and reset
14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * - ASCII
14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * - the bad-UTF-8-data block
14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * - the null data block
14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
15085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<0x80; ++i) {
15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->data[i]=initialValue;
15285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
15385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(; i<0xc0; ++i) {
15485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->data[i]=errorValue;
15585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
15685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
15785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->data[i]=initialValue;
15885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
16085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
16485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index2[i]=j;
16585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->map[i]=1;
16685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
16785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* reference counts for the bad-UTF-8-data block */
16885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->map[i]=0;
17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
17185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
17285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Reference counts for the null data block: all blocks except for the ASCII blocks.
17385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Plus 1 so that we don't drop this block during compaction.
17485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Plus as many as needed for lead surrogate code points.
17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* i==newTrie->dataNullOffset */
17785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->map[i++]=
17885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        (0x110000>>UTRIE2_SHIFT_2)-
17985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        (0x80>>UTRIE2_SHIFT_2)+
18085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        1+
18185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        UTRIE2_LSCP_INDEX_2_LENGTH;
18285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    j+=UTRIE2_DATA_BLOCK_LENGTH;
18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
18485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->map[i]=0;
18585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
18785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
18885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * set the remaining indexes in the BMP index-2 block
18985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * to the null data block
19085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
19185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
19285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
19585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Fill the index gap with impossible values so that compaction
19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * does not overlap other index-2 blocks with the gap.
19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
19985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
20085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
20185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
20385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set the indexes in the null index-2 block */
20485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
20785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
20885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
20985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
21085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set the index-1 indexes for the linear index-2 block */
21185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0, j=0;
21285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
21385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
21485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    ) {
21585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index1[i]=j;
21685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
21785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
21885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set the remaining index-1 indexes to the null index-2 block */
21985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
22085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
22185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
22285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
22385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
22485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Preallocate and reset data for U+0080..U+07ff,
22585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * for 2-byte UTF-8 which will be compacted in 64-blocks
22685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
22785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
22885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
22985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_set32(trie, i, initialValue, pErrorCode);
23085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
23185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
23285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return trie;
23385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
23485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
23585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UNewTrie2 *
23685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocloneBuilder(const UNewTrie2 *other) {
23785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNewTrie2 *trie;
23885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
23985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
24085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie==NULL) {
24185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
24285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
24385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
24485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
24585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie->data==NULL) {
24685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_free(trie);
24785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
24885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
24985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataCapacity=other->dataCapacity;
25085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
25185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* clone data */
25285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
25385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie->index2, other->index2, other->index2Length*4);
25485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2NullOffset=other->index2NullOffset;
25585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2Length=other->index2Length;
25685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
25785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie->data, other->data, other->dataLength*4);
25885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataNullOffset=other->dataNullOffset;
25985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataLength=other->dataLength;
26085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
26185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* reference counters */
26285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(other->isCompacted) {
26385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->firstFreeBlock=0;
26485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4);
26685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->firstFreeBlock=other->firstFreeBlock;
26785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
26885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
26985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->initialValue=other->initialValue;
27085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->errorValue=other->errorValue;
27185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->highStart=other->highStart;
27285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->isCompacted=other->isCompacted;
27385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
27485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return trie;
27585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
27685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
27785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2
27885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
27985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UTrie2 *trie;
28085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
28185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
28285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
28385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
28485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
28585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
28685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
28785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
28885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
28985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
29085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie==NULL) {
29185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
29285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
29385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie, other, sizeof(UTrie2));
29485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
29585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(other->memory!=NULL) {
29685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->memory=uprv_malloc(other->length);
29785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(trie->memory!=NULL) {
29885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->isMemoryOwned=TRUE;
29985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            uprv_memcpy(trie->memory, other->memory, other->length);
30085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
30185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* make the clone's pointers point to its own memory */
30285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
30385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(other->data16!=NULL) {
30485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
30585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
30685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(other->data32!=NULL) {
30785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
30885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
30985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
31085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else /* other->newTrie!=NULL */ {
31185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->newTrie=cloneBuilder(other->newTrie);
31285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
31385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
31485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie->memory==NULL && trie->newTrie==NULL) {
31585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_free(trie);
31685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie=NULL;
31785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
31885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return trie;
31985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
32085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
32185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct NewTrieAndStatus {
32285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UTrie2 *trie;
32385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UErrorCode errorCode;
32485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UBool exclusiveLimit;  /* rather than inclusive range end */
32585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} NewTrieAndStatus;
32685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
32785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UBool U_CALLCONV
32885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocopyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
32985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
33085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(value!=nt->trie->initialValue) {
33185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(nt->exclusiveLimit) {
33285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            --end;
33385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
33485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(start==end) {
33585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            utrie2_set32(nt->trie, start, value, &nt->errorCode);
33685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
33785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
33885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
33985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return U_SUCCESS(nt->errorCode);
34085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
34185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return TRUE;
34285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
34385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
34485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
34585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
34685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
34785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie_printLengths(const UTrie *trie) {
34885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long indexLength=trie->indexLength;
34985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long dataLength=(long)trie->dataLength;
35085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
35185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
35285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho           indexLength, dataLength, totalLength);
35385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
35485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
35585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
35685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_printLengths(const UTrie2 *trie, const char *which) {
35785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long indexLength=trie->indexLength;
35885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long dataLength=(long)trie->dataLength;
35985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
36085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
36185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho           which, indexLength, dataLength, totalLength);
36285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
36385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
36485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
36585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2
36685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
36785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    NewTrieAndStatus context;
36885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar lead;
36985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
37085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
37185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
37285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
37385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
37485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
37585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
37685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
37785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
37885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
37985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
38085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
38185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* Clone the frozen trie by enumerating it and building a new one. */
38285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
38385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
38485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
38585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
38685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.exclusiveLimit=FALSE;
38785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.errorCode=*pErrorCode;
38885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    utrie2_enum(other, NULL, copyEnumRange, &context);
38985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    *pErrorCode=context.errorCode;
39085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(lead=0xd800; lead<0xdc00; ++lead) {
39185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uint32_t value;
39285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(other->data32==NULL) {
39385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
39485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
39585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
39685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
39785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(value!=other->initialValue) {
39885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
39985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
40085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
40185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
40285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_close(context.trie);
40385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        context.trie=NULL;
40485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
40585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return context.trie;
40685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
40785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
40885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
40985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2
41085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
41185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    NewTrieAndStatus context;
41285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar lead;
41385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
41485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
41585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
41685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
41785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie1==NULL) {
41885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
41985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
42085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
42185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
42285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
42385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return NULL;
42485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
42585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.exclusiveLimit=TRUE;
42685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    context.errorCode=*pErrorCode;
42785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    utrie_enum(trie1, NULL, copyEnumRange, &context);
42885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    *pErrorCode=context.errorCode;
42985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(lead=0xd800; lead<0xdc00; ++lead) {
43085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uint32_t value;
43185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(trie1->data32==NULL) {
43285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
43385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
43485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
43585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
43685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(value!=trie1->initialValue) {
43785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
43885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
43985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
44085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_SUCCESS(*pErrorCode)) {
44185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_freeze(context.trie,
44285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
44385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      pErrorCode);
44485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
44585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
44685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_SUCCESS(*pErrorCode)) {
44785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie_printLengths(trie1);
44885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_printLengths(context.trie, "fromUTrie");
44985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
45085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
45185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
45285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_close(context.trie);
45385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        context.trie=NULL;
45485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
45585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return context.trie;
45685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
45785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
45885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool
45985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoisInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
46085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i2, block;
46185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
46285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_IS_LEAD(c) && forLSCP) {
46385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
46485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            (c>>UTRIE2_SHIFT_2);
46585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
46685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
46785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
46885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
46985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    block=trie->index2[i2];
47085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return (UBool)(block==trie->dataNullOffset);
47185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
47285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
47385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
47485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocIndex2Block(UNewTrie2 *trie) {
47585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t newBlock, newTop;
47685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
47785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newBlock=trie->index2Length;
47885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
47985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(newTop>LENGTHOF(trie->index2)) {
48085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /*
48185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * Should never occur.
48285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
48385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * or the code writes more values than should be possible.
48485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         */
48585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return -1;
48685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
48785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2Length=newTop;
48885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
48985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return newBlock;
49085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
49185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
49285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
49385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HogetIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i1, i2;
49585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
49685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_IS_LEAD(c) && forLSCP) {
49785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return UTRIE2_LSCP_INDEX_2_OFFSET;
49885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
49985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
50085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    i1=c>>UTRIE2_SHIFT_1;
50185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    i2=trie->index1[i1];
50285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(i2==trie->index2NullOffset) {
50385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2=allocIndex2Block(trie);
50485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(i2<0) {
50585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return -1;  /* program error */
50685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
50785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index1[i1]=i2;
50885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return i2;
51085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
51185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
51285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
51385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
51485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t newBlock, newTop;
51585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
51685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie->firstFreeBlock!=0) {
51785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* get the first free block */
51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newBlock=trie->firstFreeBlock;
51985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
52085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
52185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* get a new block from the high end */
52285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newBlock=trie->dataLength;
52385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
52485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(newTop>trie->dataCapacity) {
52585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* out of memory in the data array */
52685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            int32_t capacity;
52785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            uint32_t *data;
52885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
52985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
53085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
53185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
53285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
53385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            } else {
53485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                /*
53585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * Should never occur.
53685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
53785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * or the code writes more values than should be possible.
53885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 */
53985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                return -1;
54085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
54185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            data=(uint32_t *)uprv_malloc(capacity*4);
54285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(data==NULL) {
54385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                return -1;
54485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            uprv_memcpy(data, trie->data, trie->dataLength*4);
54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            uprv_free(trie->data);
54785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->data=data;
54885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->dataCapacity=capacity;
54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
55085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->dataLength=newTop;
55185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
55285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
55385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
55485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return newBlock;
55585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
55785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* call when the block's reference counter reaches 0 */
55885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
55985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoreleaseDataBlock(UNewTrie2 *trie, int32_t block) {
56085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* put this block at the front of the free-block chain */
56185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
56285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->firstFreeBlock=block;
56385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
56485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
56585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool
56685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoisWritableBlock(UNewTrie2 *trie, int32_t block) {
56785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
56885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
56985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
57085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE void
57185bf2e2fbc60a9f938064abc8127d61da7d19882Claire HosetIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t oldBlock;
57385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
57485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    oldBlock=trie->index2[i2];
57585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        releaseDataBlock(trie, oldBlock);
57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2[i2]=block;
57985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
58085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
58285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * No error checking for illegal arguments.
58385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
58485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return -1 if no new data block available (out of memory in data array)
58585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal
58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
58885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HogetDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
58985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i2, oldBlock, newBlock;
59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
59185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    i2=getIndex2Block(trie, c, forLSCP);
59285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(i2<0) {
59385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return -1;  /* program error */
59485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
59585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
59685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
59785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    oldBlock=trie->index2[i2];
59885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(isWritableBlock(trie, oldBlock)) {
59985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return oldBlock;
60085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
60185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
60285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* allocate a new data block */
60385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newBlock=allocDataBlock(trie, oldBlock);
60485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(newBlock<0) {
60585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* out of memory in the data array */
60685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return -1;
60785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
60885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    setIndex2Entry(trie, i2, newBlock);
60985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return newBlock;
61085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
61185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
61285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
61385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return TRUE if the value was successfully set
61485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
61585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
61685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoset32(UNewTrie2 *trie,
61785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho      UChar32 c, UBool forLSCP, uint32_t value,
61885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho      UErrorCode *pErrorCode) {
61985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t block;
62085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie==NULL || trie->isCompacted) {
62285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_NO_WRITE_PERMISSION;
62385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
62485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
62585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
62685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    block=getDataBlock(trie, c, forLSCP);
62785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(block<0) {
62885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
62985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
63085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
63185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
63285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
63385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
63485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
63585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
63685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
63785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
63885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
63985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
64085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if((uint32_t)c>0x10ffff) {
64185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
64285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
64385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
64485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    set32(trie->newTrie, c, TRUE, value, pErrorCode);
64585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
64685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
64785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
64885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
64985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                                     UChar32 c, uint32_t value,
65085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                                     UErrorCode *pErrorCode) {
65185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
65285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
65385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
65485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(!U_IS_LEAD(c)) {
65585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
65685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
65785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
65885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    set32(trie->newTrie, c, FALSE, value, pErrorCode);
65985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
66085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
66185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
66285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HowriteBlock(uint32_t *block, uint32_t value) {
66385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
66485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while(block<limit) {
66585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *block++=value;
66685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
66785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
66885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
66985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
67085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * initialValue is ignored if overwrite=TRUE
67185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal
67285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
67385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
67485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofillBlock(uint32_t *block, UChar32 start, UChar32 limit,
67585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho          uint32_t value, uint32_t initialValue, UBool overwrite) {
67685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t *pLimit;
67785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
67885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    pLimit=block+limit;
67985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    block+=start;
68085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(overwrite) {
68185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        while(block<pLimit) {
68285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *block++=value;
68385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
68485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
68585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        while(block<pLimit) {
68685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(*block==initialValue) {
68785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                *block=value;
68885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
68985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            ++block;
69085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
69185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
69285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
69385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
69485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
69585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_setRange32(UTrie2 *trie,
69685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                  UChar32 start, UChar32 end,
69785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                  uint32_t value, UBool overwrite,
69885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                  UErrorCode *pErrorCode) {
69985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
70085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * repeat value in [start..end]
70185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * mark index values for repeat-data blocks by setting bit 31 of the index values
70285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * fill around existing values if any, if(overwrite)
70385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
70485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNewTrie2 *newTrie;
70585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t block, rest, repeatBlock;
70685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar32 limit;
70785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
70885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
70985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
71085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
71185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
71285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
71385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
71485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
71585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie=trie->newTrie;
71685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(newTrie==NULL || newTrie->isCompacted) {
71785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_NO_WRITE_PERMISSION;
71885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
71985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
72085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(!overwrite && value==newTrie->initialValue) {
72185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return; /* nothing to do */
72285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
72385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
72485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    limit=end+1;
72585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(start&UTRIE2_DATA_MASK) {
72685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        UChar32 nextStart;
72785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
72885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* set partial block at [start..following block boundary[ */
72985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        block=getDataBlock(newTrie, start, TRUE);
73085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(block<0) {
73185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
73285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
73385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
73485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
73585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
73685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(nextStart<=limit) {
73785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
73885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      value, newTrie->initialValue, overwrite);
73985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start=nextStart;
74085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
74185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
74285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                      value, newTrie->initialValue, overwrite);
74385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
74485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
74585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
74685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
74785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* number of positions in the last, partial block */
74885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    rest=limit&UTRIE2_DATA_MASK;
74985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
75085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* round down limit to a block boundary */
75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    limit&=~UTRIE2_DATA_MASK;
75285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
75385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* iterate over all-value blocks */
75485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(value==newTrie->initialValue) {
75585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        repeatBlock=newTrie->dataNullOffset;
75685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
75785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        repeatBlock=-1;
75885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
75985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
76085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while(start<limit) {
76185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        int32_t i2;
76285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        UBool setRepeatBlock=FALSE;
76385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
76485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
76585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
76685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            continue;
76785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
76885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
76985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* get index value */
77085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2=getIndex2Block(newTrie, start, TRUE);
77185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(i2<0) {
77285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
77385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
77485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
77585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
77685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        block=newTrie->index2[i2];
77785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(isWritableBlock(newTrie, block)) {
77885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* already allocated */
77985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
78085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                /*
78185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * We overwrite all values, and it's not a
78285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * protected (ASCII-linear or 2-byte UTF-8) block:
78385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 * replace with the repeatBlock.
78485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 */
78585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                setRepeatBlock=TRUE;
78685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            } else {
78785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                /* !overwrite, or protected block: just write the values into this block */
78885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                fillBlock(newTrie->data+block,
78985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                          0, UTRIE2_DATA_BLOCK_LENGTH,
79085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                          value, newTrie->initialValue, overwrite);
79185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
79285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
79385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /*
79485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * Set the repeatBlock instead of the null block or previous repeat block:
79585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             *
79685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * If !isWritableBlock() then all entries in the block have the same value
79785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * because it's the null block or a range block (the repeatBlock from a previous
79885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * call to utrie2_setRange32()).
79985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * No other blocks are used multiple times before compacting.
80085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             *
80185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * The null block is the only non-writable block with the initialValue because
80285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * of the repeatBlock initialization above. (If value==initialValue, then
80385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * the repeatBlock will be the null data block.)
80485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             *
80585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * We set our repeatBlock if the desired value differs from the block's value,
80685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * and if we overwrite any data or if the data is all initial values
80785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             * (which is the same as the block being the null block, see above).
80885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             */
80985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            setRepeatBlock=TRUE;
81085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
81185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(setRepeatBlock) {
81285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(repeatBlock>=0) {
81385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                setIndex2Entry(newTrie, i2, repeatBlock);
81485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            } else {
81585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                /* create and set and fill the repeatBlock */
81685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                repeatBlock=getDataBlock(newTrie, start, TRUE);
81785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                if(repeatBlock<0) {
81885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
81985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    return;
82085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                }
82185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                writeBlock(newTrie->data+repeatBlock, value);
82285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
82385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
82485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
82585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        start+=UTRIE2_DATA_BLOCK_LENGTH;
82685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
82785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
82885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(rest>0) {
82985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* set partial block at [last block boundary..limit[ */
83085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        block=getDataBlock(newTrie, start, TRUE);
83185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(block<0) {
83285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
83385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
83485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
83585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
83685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
83785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
83885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
83985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return;
84085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
84185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
84285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* compaction --------------------------------------------------------------- */
84385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
84485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool
84585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoequal_int32(const int32_t *s, const int32_t *t, int32_t length) {
84685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while(length>0 && *s==*t) {
84785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ++s;
84885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ++t;
84985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        --length;
85085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
85185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return (UBool)(length==0);
85285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
85385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
85485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool
85585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoequal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
85685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while(length>0 && *s==*t) {
85785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ++s;
85885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ++t;
85985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        --length;
86085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
86185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return (UBool)(length==0);
86285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
86385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
86585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
86685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t block;
86785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
86885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* ensure that we do not even partially get past index2Length */
86985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
87085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
87185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(block=0; block<=index2Length; ++block) {
87285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
87385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return block;
87485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
87585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
87685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return -1;
87785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
87885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
87985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t
88085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
88185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t block;
88285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
88385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* ensure that we do not even partially get past dataLength */
88485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    dataLength-=blockLength;
88585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
88685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
88785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
88885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return block;
88985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
89085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
89185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return -1;
89285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
89385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
89485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
89585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Find the start of the last range in the trie by enumerating backward.
89685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Indexes for supplementary code points higher than this will be omitted.
89785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
89885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UChar32
89985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindHighStart(UNewTrie2 *trie, uint32_t highValue) {
90085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    const uint32_t *data32;
90185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
90285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value, initialValue;
90385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar32 c, prev;
90485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
90585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
90685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    data32=trie->data;
90785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    initialValue=trie->initialValue;
90885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
90985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    index2NullOffset=trie->index2NullOffset;
91085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    nullBlock=trie->dataNullOffset;
91185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
91285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set variables for previous range */
91385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highValue==initialValue) {
91485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        prevI2Block=index2NullOffset;
91585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        prevBlock=nullBlock;
91685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
91785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        prevI2Block=-1;
91885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        prevBlock=-1;
91985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
92085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    prev=0x110000;
92185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
92285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* enumerate index-2 blocks */
92385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    i1=UNEWTRIE2_INDEX_1_LENGTH;
92485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    c=prev;
92585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while(c>0) {
92685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        i2Block=trie->index1[--i1];
92785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(i2Block==prevI2Block) {
92885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* the index-2 block is the same as the previous one, and filled with highValue */
92985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
93085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            continue;
93185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
93285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        prevI2Block=i2Block;
93385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(i2Block==index2NullOffset) {
93485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* this is the null index-2 block */
93585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(highValue!=initialValue) {
93685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                return c;
93785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
93885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
93985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
94085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* enumerate data blocks for one index-2 block */
94185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
94285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                block=trie->index2[i2Block+ --i2];
94385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                if(block==prevBlock) {
94485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    /* the block is the same as the previous one, and filled with highValue */
94585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    c-=UTRIE2_DATA_BLOCK_LENGTH;
94685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    continue;
94785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                }
94885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                prevBlock=block;
94985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                if(block==nullBlock) {
95085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    /* this is the null data block */
95185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    if(highValue!=initialValue) {
95285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                        return c;
95385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    }
95485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    c-=UTRIE2_DATA_BLOCK_LENGTH;
95585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                } else {
95685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
95785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                        value=data32[block+ --j];
95885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                        if(value!=highValue) {
95985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                            return c;
96085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                        }
96185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                        --c;
96285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    }
96385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                }
96485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
96585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
96685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
96785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
96885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* deliver last range */
96985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return 0;
97085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
97185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
97285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/*
97385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Compact a build-time trie.
97485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
97585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The compaction
97685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - removes blocks that are identical with earlier ones
97785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
97885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - moves blocks in steps of the data granularity
97985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - moves and overlaps blocks that overlap with multiple values in the overlap region
98085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *
98185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * It does not
98285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - try to move and overlap blocks that are not already adjacent
98385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
98485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
98585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactData(UNewTrie2 *trie) {
98685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t start, newStart, movedStart;
98785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t blockLength, overlap;
98885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, mapIndex, blockCount;
98985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
99085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* do not compact linear-ASCII data */
99185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newStart=UTRIE2_DATA_START_OFFSET;
99285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
99385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->map[i]=start;
99485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
99585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
99685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
99785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Start with a block length of 64 for 2-byte UTF-8,
99885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
99985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
100085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    blockLength=64;
100185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    blockCount=blockLength>>UTRIE2_SHIFT_2;
100285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(start=newStart; start<trie->dataLength;) {
100385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /*
100485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * start: index of first entry of current block
100585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * newStart: index where the current block is to be moved
100685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         *           (right after current end of already-compacted data)
100785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         */
100885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
100985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
101085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            blockCount=1;
101185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
101285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
101385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* skip blocks that are not used */
101485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
101585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* advance start to the next block */
101685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=blockLength;
101785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
101885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* leave newStart with the previous block! */
101985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            continue;
102085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
102185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
102285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* search for an identical block */
102385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
102485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             >=0
102585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ) {
102685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* found an identical block, set the other block's index value for the current block */
102785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
102885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->map[mapIndex++]=movedStart;
102985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
103085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
103185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
103285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* advance start to the next block */
103385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=blockLength;
103485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
103585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* leave newStart with the previous block! */
103685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            continue;
103785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
103885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
103985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* see if the beginning of this block can be overlapped with the end of the previous block */
104085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
104185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
104285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
104385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            overlap-=UTRIE2_DATA_GRANULARITY) {}
104485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
104585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(overlap>0 || newStart<start) {
104685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* some overlap, or just move the whole block */
104785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            movedStart=newStart-overlap;
104885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
104985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->map[mapIndex++]=movedStart;
105085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
105185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
105285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
105385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* move the non-overlapping indexes to their new positions */
105485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=overlap;
105585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i=blockLength-overlap; i>0; --i) {
105685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->data[newStart++]=trie->data[start++];
105785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
105885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else /* no overlap && newStart==start */ {
105985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
106085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->map[mapIndex++]=start;
106185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                start+=UTRIE2_DATA_BLOCK_LENGTH;
106285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
106385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            newStart=start;
106485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
106585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
106685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
106785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* now adjust the index-2 table */
106885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<trie->index2Length; ++i) {
106985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
107085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* Gap indexes are invalid (-1). Skip over the gap. */
107185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
107285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
107385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
107485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
107585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
107685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
107785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* ensure dataLength alignment */
107885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
107985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->data[newStart++]=trie->initialValue;
108085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
108185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
108285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
108385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* we saved some space */
108485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
108585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            (long)trie->dataLength, (long)newStart);
108685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
108785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
108885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataLength=newStart;
108985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
109085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
109185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
109285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactIndex2(UNewTrie2 *trie) {
109385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, start, newStart, movedStart, overlap;
109485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
109585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* do not compact linear-BMP index-2 blocks */
109685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
109785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
109885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->map[i]=start;
109985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
110085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
110185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* Reduce the index table gap to what will be needed at runtime. */
110285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
110385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
110485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
110585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /*
110685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * start: index of first entry of current block
110785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * newStart: index where the current block is to be moved
110885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         *           (right after current end of already-compacted data)
110985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         */
111085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
111185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* search for an identical block */
111285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
111385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho             >=0
111485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ) {
111585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* found an identical block, set the other block's index value for the current block */
111685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
111785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
111885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* advance start to the next block */
111985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
112085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
112185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* leave newStart with the previous block! */
112285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            continue;
112385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
112485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
112585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* see if the beginning of this block can be overlapped with the end of the previous block */
112685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* look for maximum overlap with the previous, adjacent block */
112785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
112885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
112985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            --overlap) {}
113085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
113185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(overlap>0 || newStart<start) {
113285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* some overlap, or just move the whole block */
113385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
113485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
113585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* move the non-overlapping indexes to their new positions */
113685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=overlap;
113785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
113885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                trie->index2[newStart++]=trie->index2[start++];
113985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            }
114085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else /* no overlap && newStart==start */ {
114185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
114285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
114385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            newStart=start;
114485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
114585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
114685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
114785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* now adjust the index-1 table */
114885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
114985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
115085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
115185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
115285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
115385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
115485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Ensure data table alignment:
115585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Needs to be granularity-aligned for 16-bit trie
115685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * (so that dataMove will be down-shiftable),
115785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * and 2-aligned for uint32_t data.
115885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
115985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
116085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* Arbitrary value: 0x3fffc not possible for real data. */
116185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
116285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
116385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
116485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
116585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* we saved some space */
116685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
116785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            (long)trie->index2Length, (long)newStart);
116885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
116985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
117085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index2Length=newStart;
117185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
117285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
117385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void
117485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
117585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNewTrie2 *newTrie;
117685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar32 highStart, suppHighStart;
117785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t highValue;
117885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
117985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie=trie->newTrie;
118085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
118185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* find highStart and round it up */
118285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    highValue=utrie2_get32(trie, 0x10ffff);
118385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    highStart=findHighStart(newTrie, highValue);
118485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
118585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart==0x110000) {
118685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        highValue=trie->errorValue;
118785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
118885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
118985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
119085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Set trie->highStart only after utrie2_get32(trie, highStart).
119185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
119285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
119385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->highStart=newTrie->highStart=highStart;
119485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
119585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
119685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
119785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            (long)highStart, (long)highValue, (long)trie->initialValue);
119885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
119985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
120085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart<0x110000) {
120185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* Blank out [highStart..10ffff] to release associated data blocks. */
120285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
120385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
120485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(U_FAILURE(*pErrorCode)) {
120585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
120685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
120785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
120885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
120985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    compactData(newTrie);
121085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart>0x10000) {
121185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        compactIndex2(newTrie);
121285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG
121385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
121485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
121585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
121685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif
121785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
121885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
121985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /*
122085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Store the highValue in the data array and round up the dataLength.
122185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * Must be done after compactData() because that assumes that dataLength
122285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
122385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho     */
122485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->data[newTrie->dataLength++]=highValue;
122585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
122685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        newTrie->data[newTrie->dataLength++]=trie->initialValue;
122785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
122885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
122985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie->isCompacted=TRUE;
123085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
123185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
123285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* serialization ------------------------------------------------------------ */
123385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
123485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
123585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Maximum length of the runtime index array.
123685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
123785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (The actual maximum length is lower,
123885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
123985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
124085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UTRIE2_MAX_INDEX_LENGTH 0xffff
124185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
124285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/**
124385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Maximum length of the runtime data array.
124485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
124585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and by uint16_t UTrie2Header.shiftedDataLength.
124685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */
124785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
124885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
124985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Compact and internally serialize the trie. */
125085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2
125185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
125285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UNewTrie2 *newTrie;
125385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UTrie2Header *header;
125485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t *p;
125585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint16_t *dest16;
125685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, length;
125785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t allIndexesLength;
125885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
125985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    UChar32 highStart;
126085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
126185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* argument check */
126285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
126385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
126485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
126585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if( trie==NULL ||
126685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
126785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    ) {
126885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
126985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
127085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
127185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    newTrie=trie->newTrie;
127285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(newTrie==NULL) {
127385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* already frozen */
127485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        UTrie2ValueBits frozenValueBits=
127585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
127685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(valueBits!=frozenValueBits) {
127785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
127885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
127985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
128085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
128185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
128285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* compact if necessary */
128385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(!newTrie->isCompacted) {
128485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        compactTrie(trie, pErrorCode);
128585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(U_FAILURE(*pErrorCode)) {
128685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return;
128785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
128885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
128985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    highStart=trie->highStart;
129085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
129185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart<=0x10000) {
129285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
129385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
129485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        allIndexesLength=newTrie->index2Length;
129585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
129685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(valueBits==UTRIE2_16_VALUE_BITS) {
129785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        dataMove=allIndexesLength;
129885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
129985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        dataMove=0;
130085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
130185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
130285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* are indexLength and dataLength within limits? */
130385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if( /* for unshifted indexLength */
130485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
130585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* for unshifted dataNullOffset */
130685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        (dataMove+newTrie->dataNullOffset)>0xffff ||
130785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* for unshifted 2-byte UTF-8 index-2 values */
130885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
130985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* for shiftedDataLength */
131085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
131185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    ) {
131285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
131385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
131485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
131585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
131685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* calculate the total serialized length */
131785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    length=sizeof(UTrie2Header)+allIndexesLength*2;
131885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(valueBits==UTRIE2_16_VALUE_BITS) {
131985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        length+=newTrie->dataLength*2;
132085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
132185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        length+=newTrie->dataLength*4;
132285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
132385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
132485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->memory=uprv_malloc(length);
132585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(trie->memory==NULL) {
132685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
132785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
132885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
132985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->length=length;
133085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->isMemoryOwned=TRUE;
133185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
133285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->indexLength=allIndexesLength;
133385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataLength=newTrie->dataLength;
133485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart<=0x10000) {
133585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index2NullOffset=0xffff;
133685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
133785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
133885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
133985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
134085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
134185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
134285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* set the header fields */
134385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header=(UTrie2Header *)trie->memory;
134485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
134585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->signature=UTRIE2_SIG; /* "Tri2" */
134685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->options=(uint16_t)valueBits;
134785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
134885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->indexLength=(uint16_t)trie->indexLength;
134985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
135085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->index2NullOffset=trie->index2NullOffset;
135185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->dataNullOffset=trie->dataNullOffset;
135285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
135385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
135485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* fill the index and data arrays */
135585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    dest16=(uint16_t *)(header+1);
135685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->index=dest16;
135785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
135885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
135985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    p=(uint32_t *)newTrie->index2;
136085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
136185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
136285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
136385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
136485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* write UTF-8 2-byte index-2 values, not right-shifted */
136585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
136685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
136785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
136885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
136985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
137085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
137185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
137285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(highStart>0x10000) {
137385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
137485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
137585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
137685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* write 16-bit index-1 values for supplementary code points */
137785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
137885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        for(i=index1Length; i>0; --i) {
137985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
138085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
138185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
138285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /*
138385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * write the index-2 array values for supplementary code points,
138485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
138585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho         */
138685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        p=(uint32_t *)newTrie->index2+index2Offset;
138785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
138885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
138985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
139085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
139185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
139285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* write the 16/32-bit data array */
139385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    switch(valueBits) {
139485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    case UTRIE2_16_VALUE_BITS:
139585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* write 16-bit data values */
139685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->data16=dest16;
139785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->data32=NULL;
139885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        p=newTrie->data;
139985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        for(i=newTrie->dataLength; i>0; --i) {
140085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            *dest16++=(uint16_t)*p++;
140185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
140285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        break;
140385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    case UTRIE2_32_VALUE_BITS:
140485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        /* write 32-bit data values */
140585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->data16=NULL;
140685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        trie->data32=(uint32_t *)dest16;
140785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4);
140885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        break;
140985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    default:
141085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
141185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
141285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
141385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
141485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* Delete the UNewTrie2. */
141585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_free(newTrie->data);
141685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uprv_free(newTrie);
141785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    trie->newTrie=NULL;
141885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
141985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
142085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UBool U_EXPORT2
142185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_isFrozen(const UTrie2 *trie) {
142285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return (UBool)(trie->newTrie==NULL);
142385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
142485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
142585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI int32_t U_EXPORT2
142685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_serialize(UTrie2 *trie,
142785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 void *data, int32_t capacity,
142885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                 UErrorCode *pErrorCode) {
142985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    /* argument check */
143085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(U_FAILURE(*pErrorCode)) {
143185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return 0;
143285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
143385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
143485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
143585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
143685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    ) {
143785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
143885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return 0;
143985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
144085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
144185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(capacity>=trie->length) {
144285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uprv_memcpy(data, trie->memory, trie->length);
144385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
144485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
144585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
144685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    return trie->length;
144785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}
144827f654740f2a26ad62a5c155af9199af9e69b889claireho
144927f654740f2a26ad62a5c155af9199af9e69b889claireho/*
145027f654740f2a26ad62a5c155af9199af9e69b889claireho * This is here to avoid a dependency from utrie2.cpp on utrie.c.
145127f654740f2a26ad62a5c155af9199af9e69b889claireho * This file already depends on utrie.c.
145227f654740f2a26ad62a5c155af9199af9e69b889claireho * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
145327f654740f2a26ad62a5c155af9199af9e69b889claireho */
145427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int32_t U_EXPORT2
145527f654740f2a26ad62a5c155af9199af9e69b889clairehoutrie2_swapAnyVersion(const UDataSwapper *ds,
145627f654740f2a26ad62a5c155af9199af9e69b889claireho                      const void *inData, int32_t length, void *outData,
145727f654740f2a26ad62a5c155af9199af9e69b889claireho                      UErrorCode *pErrorCode) {
145827f654740f2a26ad62a5c155af9199af9e69b889claireho    if(U_SUCCESS(*pErrorCode)) {
145927f654740f2a26ad62a5c155af9199af9e69b889claireho        switch(utrie2_getVersion(inData, length, TRUE)) {
146027f654740f2a26ad62a5c155af9199af9e69b889claireho        case 1:
146127f654740f2a26ad62a5c155af9199af9e69b889claireho            return utrie_swap(ds, inData, length, outData, pErrorCode);
146227f654740f2a26ad62a5c155af9199af9e69b889claireho        case 2:
146327f654740f2a26ad62a5c155af9199af9e69b889claireho            return utrie2_swap(ds, inData, length, outData, pErrorCode);
146427f654740f2a26ad62a5c155af9199af9e69b889claireho        default:
146527f654740f2a26ad62a5c155af9199af9e69b889claireho            *pErrorCode=U_INVALID_FORMAT_ERROR;
146627f654740f2a26ad62a5c155af9199af9e69b889claireho            return 0;
146727f654740f2a26ad62a5c155af9199af9e69b889claireho        }
146827f654740f2a26ad62a5c155af9199af9e69b889claireho    }
146927f654740f2a26ad62a5c155af9199af9e69b889claireho    return 0;
147027f654740f2a26ad62a5c155af9199af9e69b889claireho}
1471