1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2001-2010, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: utrie2_builder.c 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2008sep26 (split off from utrie2.c) 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Markus W. Scherer 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This is a common implementation of a Unicode trie. 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* It is a kind of compressed, serializable table of 16- or 32-bit values associated with 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Unicode code points (0..0x10ffff). 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This is the second common version of a Unicode trie (hence the name UTrie2). 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* See utrie2.h for a comparison. 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This file contains only the builder code. 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* See utrie2.c for the runtime and enumeration code. 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# include <stdio.h> 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2.h" 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie2_impl.h" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */ 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Implementation notes ----------------------------------------------------- */ 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * have been chosen to minimize trie sizes overall. 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Most of the code is flexible enough to work with a range of values, 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * within certain limits. 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Exception: Support for separate values for lead surrogate code _units_ 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * vs. code _points_ was added after the constants were fixed, 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and has not been tested nor particularly designed for different constant values. 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * part and back.) 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * remapping) stops working. 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * assumes that a single index-2 block is used for 0x400 code points 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * corresponding to one lead surrogate. 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * more than one Unicode plane, and the split of the index-2 table into a BMP 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * part and a supplementary part, with a gap in between, would not work. 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * there is data with more than 64k distinct values, 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for example for Unihan collation with a separate collation weight per 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Han character. 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Building a trie ----------------------------------------------------------*/ 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enum { 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** The null index-2 block, following the gap in the index-2 table. */ 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** The start of allocated index-2 blocks. */ 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The null data block. 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to work with 6-bit trail bytes from 2-byte UTF-8. 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** The start of allocated data blocks. */ 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /** 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The start of data blocks for U+0800 and above. 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Below, compaction uses a block length of 64 for 2-byte UTF-8. 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Data values for 0x780 code points beyond ASCII. 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Start with allocation of 16k data entries. */ 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Grow about 8x each time. */ 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocIndex2Block(UNewTrie2 *trie); 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2 *trie; 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie2 *newTrie; 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *data; 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, j; 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || newTrie==NULL || data==NULL) { 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie); 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(newTrie); 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(data); 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(trie, 0, sizeof(UTrie2)); 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->initialValue=initialValue; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->errorValue=errorValue; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->highStart=0x110000; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->newTrie=newTrie; 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data=data; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->initialValue=initialValue; 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->errorValue=errorValue; 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->highStart=0x110000; 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->firstFreeBlock=0; /* no free block in the list */ 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->isCompacted=FALSE; 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * preallocate and reset 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - ASCII 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - the bad-UTF-8-data block 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - the null data block 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<0x80; ++i) { 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data[i]=initialValue; 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; i<0xc0; ++i) { 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data[i]=errorValue; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data[i]=initialValue; 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2[i]=j; 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->map[i]=1; 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reference counts for the bad-UTF-8-data block */ 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->map[i]=0; 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Reference counts for the null data block: all blocks except for the ASCII blocks. 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Plus 1 so that we don't drop this block during compaction. 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Plus as many as needed for lead surrogate code points. 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* i==newTrie->dataNullOffset */ 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->map[i++]= 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (0x110000>>UTRIE2_SHIFT_2)- 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (0x80>>UTRIE2_SHIFT_2)+ 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1+ 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE2_LSCP_INDEX_2_LENGTH; 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j+=UTRIE2_DATA_BLOCK_LENGTH; 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->map[i]=0; 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set the remaining indexes in the BMP index-2 block 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to the null data block 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Fill the index gap with impossible values so that compaction 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * does not overlap other index-2 blocks with the gap. 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the indexes in the null index-2 block */ 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the index-1 indexes for the linear index-2 block */ 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0, j=0; 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index1[i]=j; 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the remaining index-1 indexes to the null index-2 block */ 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Preallocate and reset data for U+0080..U+07ff, 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for 2-byte UTF-8 which will be compacted in 64-blocks 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_set32(trie, i, initialValue, pErrorCode); 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie; 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UNewTrie2 * 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cloneBuilder(const UNewTrie2 *other) { 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie2 *trie; 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL) { 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->data==NULL) { 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie); 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataCapacity=other->dataCapacity; 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* clone data */ 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->index2, other->index2, other->index2Length*4); 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2NullOffset=other->index2NullOffset; 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2Length=other->index2Length; 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->data, other->data, other->dataLength*4); 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataNullOffset=other->dataNullOffset; 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=other->dataLength; 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reference counters */ 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->isCompacted) { 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->firstFreeBlock=0; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4); 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->firstFreeBlock=other->firstFreeBlock; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->initialValue=other->initialValue; 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->errorValue=other->errorValue; 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->highStart=other->highStart; 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isCompacted=other->isCompacted; 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie; 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2 *trie; 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL) { 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie, other, sizeof(UTrie2)); 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->memory!=NULL) { 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->memory=uprv_malloc(other->length); 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->memory!=NULL) { 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isMemoryOwned=TRUE; 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->memory, other->memory, other->length); 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* make the clone's pointers point to its own memory */ 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->data16!=NULL) { 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->data32!=NULL) { 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* other->newTrie!=NULL */ { 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->newTrie=cloneBuilder(other->newTrie); 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->memory==NULL && trie->newTrie==NULL) { 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie); 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=NULL; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie; 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)typedef struct NewTrieAndStatus { 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2 *trie; 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode errorCode; 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool exclusiveLimit; /* rather than inclusive range end */ 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} NewTrieAndStatus; 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool U_CALLCONV 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NewTrieAndStatus *nt=(NewTrieAndStatus *)context; 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=nt->trie->initialValue) { 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(nt->exclusiveLimit) { 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --end; 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start==end) { 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_set32(nt->trie, start, value, &nt->errorCode); 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return U_SUCCESS(nt->errorCode); 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_printLengths(const UTrie *trie) { 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long indexLength=trie->indexLength; 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long dataLength=(long)trie->dataLength; 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) indexLength, dataLength, totalLength); 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_printLengths(const UTrie2 *trie, const char *which) { 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long indexLength=trie->indexLength; 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long dataLength=(long)trie->dataLength; 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n", 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) which, indexLength, dataLength, totalLength); 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NewTrieAndStatus context; 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar lead; 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->newTrie!=NULL && !other->newTrie->isCompacted) { 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Clone the frozen trie by enumerating it and building a new one. */ 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.exclusiveLimit=FALSE; 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.errorCode=*pErrorCode; 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_enum(other, NULL, copyEnumRange, &context); 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=context.errorCode; 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(lead=0xd800; lead<0xdc00; ++lead) { 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other->data32==NULL) { 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=other->initialValue) { 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_close(context.trie); 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.trie=NULL; 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return context.trie; 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UTrie2 * U_EXPORT2 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NewTrieAndStatus context; 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar lead; 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie1==NULL) { 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.exclusiveLimit=TRUE; 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.errorCode=*pErrorCode; 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_enum(trie1, NULL, copyEnumRange, &context); 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=context.errorCode; 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(lead=0xd800; lead<0xdc00; ++lead) { 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie1->data32==NULL) { 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=UTRIE_GET16_FROM_LEAD(trie1, lead); 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=UTRIE_GET32_FROM_LEAD(trie1, lead); 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=trie1->initialValue) { 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*pErrorCode)) { 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_freeze(context.trie, 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*pErrorCode)) { 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_printLengths(trie1); 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_printLengths(context.trie, "fromUTrie"); 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_close(context.trie); 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.trie=NULL; 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return context.trie; 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i2, block; 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_IS_LEAD(c) && forLSCP) { 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (c>>UTRIE2_SHIFT_2); 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=trie->index2[i2]; 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(block==trie->dataNullOffset); 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocIndex2Block(UNewTrie2 *trie) { 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t newBlock, newTop; 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=trie->index2Length; 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newTop>LENGTHOF(trie->index2)) { 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Should never occur. 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or the code writes more values than should be possible. 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2Length=newTop; 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return newBlock; 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i1, i2; 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_IS_LEAD(c) && forLSCP) { 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UTRIE2_LSCP_INDEX_2_OFFSET; 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i1=c>>UTRIE2_SHIFT_1; 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=trie->index1[i1]; 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2==trie->index2NullOffset) { 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=allocIndex2Block(trie); 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2<0) { 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; /* program error */ 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index1[i1]=i2; 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return i2; 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t newBlock, newTop; 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->firstFreeBlock!=0) { 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get the first free block */ 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=trie->firstFreeBlock; 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get a new block from the high end */ 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=trie->dataLength; 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newTop>trie->dataCapacity) { 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* out of memory in the data array */ 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t capacity; 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *data; 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) capacity=UNEWTRIE2_MAX_DATA_LENGTH; 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Should never occur. 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or the code writes more values than should be possible. 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data=(uint32_t *)uprv_malloc(capacity*4); 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(data==NULL) { 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(data, trie->data, trie->dataLength*4); 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie->data); 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data=data; 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataCapacity=capacity; 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=newTop; 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[newBlock>>UTRIE2_SHIFT_2]=0; 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return newBlock; 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* call when the block's reference counter reaches 0 */ 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)releaseDataBlock(UNewTrie2 *trie, int32_t block) { 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* put this block at the front of the free-block chain */ 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->firstFreeBlock=block; 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)isWritableBlock(UNewTrie2 *trie, int32_t block) { 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE void 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t oldBlock; 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) oldBlock=trie->index2[i2]; 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) releaseDataBlock(trie, oldBlock); 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2[i2]=block; 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * No error checking for illegal arguments. 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return -1 if no new data block available (out of memory in data array) 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i2, oldBlock, newBlock; 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=getIndex2Block(trie, c, forLSCP); 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2<0) { 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; /* program error */ 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) oldBlock=trie->index2[i2]; 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(isWritableBlock(trie, oldBlock)) { 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return oldBlock; 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* allocate a new data block */ 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=allocDataBlock(trie, oldBlock); 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newBlock<0) { 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* out of memory in the data array */ 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) setIndex2Entry(trie, i2, newBlock); 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return newBlock; 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return TRUE if the value was successfully set 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)set32(UNewTrie2 *trie, 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, UBool forLSCP, uint32_t value, 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || trie->isCompacted) { 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_NO_WRITE_PERMISSION; 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=getDataBlock(trie, c, forLSCP); 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[block+(c&UTRIE2_DATA_MASK)]=value; 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((uint32_t)c>0x10ffff) { 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set32(trie->newTrie, c, TRUE, value, pErrorCode); 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, uint32_t value, 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!U_IS_LEAD(c)) { 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) set32(trie->newTrie, c, FALSE, value, pErrorCode); 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)writeBlock(uint32_t *block, uint32_t value) { 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(block<limit) { 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *block++=value; 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * initialValue is ignored if overwrite=TRUE 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)fillBlock(uint32_t *block, UChar32 start, UChar32 limit, 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, uint32_t initialValue, UBool overwrite) { 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pLimit; 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pLimit=block+limit; 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block+=start; 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overwrite) { 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(block<pLimit) { 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *block++=value; 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(block<pLimit) { 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*block==initialValue) { 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *block=value; 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++block; 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_setRange32(UTrie2 *trie, 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 start, UChar32 end, 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, UBool overwrite, 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * repeat value in [start..end] 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * mark index values for repeat-data blocks by setting bit 31 of the index values 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * fill around existing values if any, if(overwrite) 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie2 *newTrie; 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block, rest, repeatBlock; 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 limit; 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie=trie->newTrie; 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newTrie==NULL || newTrie->isCompacted) { 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_NO_WRITE_PERMISSION; 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!overwrite && value==newTrie->initialValue) { 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; /* nothing to do */ 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=end+1; 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start&UTRIE2_DATA_MASK) { 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 nextStart; 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set partial block at [start..following block boundary[ */ 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=getDataBlock(newTrie, start, TRUE); 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(nextStart<=limit) { 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, newTrie->initialValue, overwrite); 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=nextStart; 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, newTrie->initialValue, overwrite); 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* number of positions in the last, partial block */ 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rest=limit&UTRIE2_DATA_MASK; 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* round down limit to a block boundary */ 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit&=~UTRIE2_DATA_MASK; 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* iterate over all-value blocks */ 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==newTrie->initialValue) { 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=newTrie->dataNullOffset; 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=-1; 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(start<limit) { 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i2; 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool setRepeatBlock=FALSE; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get index value */ 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2=getIndex2Block(newTrie, start, TRUE); 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2<0) { 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=newTrie->index2[i2]; 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(isWritableBlock(newTrie, block)) { 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* already allocated */ 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We overwrite all values, and it's not a 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * protected (ASCII-linear or 2-byte UTF-8) block: 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * replace with the repeatBlock. 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) setRepeatBlock=TRUE; 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* !overwrite, or protected block: just write the values into this block */ 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fillBlock(newTrie->data+block, 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 0, UTRIE2_DATA_BLOCK_LENGTH, 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, newTrie->initialValue, overwrite); 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the repeatBlock instead of the null block or previous repeat block: 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If !isWritableBlock() then all entries in the block have the same value 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * because it's the null block or a range block (the repeatBlock from a previous 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * call to utrie2_setRange32()). 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * No other blocks are used multiple times before compacting. 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The null block is the only non-writable block with the initialValue because 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the repeatBlock initialization above. (If value==initialValue, then 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the repeatBlock will be the null data block.) 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We set our repeatBlock if the desired value differs from the block's value, 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and if we overwrite any data or if the data is all initial values 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (which is the same as the block being the null block, see above). 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) setRepeatBlock=TRUE; 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(setRepeatBlock) { 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(repeatBlock>=0) { 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) setIndex2Entry(newTrie, i2, repeatBlock); 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* create and set and fill the repeatBlock */ 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=getDataBlock(newTrie, start, TRUE); 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(repeatBlock<0) { 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) writeBlock(newTrie->data+repeatBlock, value); 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE2_DATA_BLOCK_LENGTH; 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(rest>0) { 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set partial block at [last block boundary..limit[ */ 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=getDataBlock(newTrie, start, TRUE); 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* compaction --------------------------------------------------------------- */ 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_int32(const int32_t *s, const int32_t *t, int32_t length) { 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && *s==*t) { 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++s; 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++t; 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(length==0); 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && *s==*t) { 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++s; 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++t; 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(length==0); 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ensure that we do not even partially get past index2Length */ 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(block=0; block<=index2Length; ++block) { 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return block; 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ensure that we do not even partially get past dataLength */ 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataLength-=blockLength; 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(equal_uint32(data+block, data+otherBlock, blockLength)) { 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return block; 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Find the start of the last range in the trie by enumerating backward. 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Indexes for supplementary code points higher than this will be omitted. 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UChar32 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)findHighStart(UNewTrie2 *trie, uint32_t highValue) { 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *data32; 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, initialValue; 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, prev; 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data32=trie->data; 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialValue=trie->initialValue; 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) index2NullOffset=trie->index2NullOffset; 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nullBlock=trie->dataNullOffset; 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set variables for previous range */ 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highValue==initialValue) { 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevI2Block=index2NullOffset; 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevI2Block=-1; 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=-1; 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=0x110000; 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate index-2 blocks */ 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i1=UNEWTRIE2_INDEX_1_LENGTH; 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=prev; 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(c>0) { 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i2Block=trie->index1[--i1]; 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2Block==prevI2Block) { 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the index-2 block is the same as the previous one, and filled with highValue */ 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevI2Block=i2Block; 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i2Block==index2NullOffset) { 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is the null index-2 block */ 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highValue!=initialValue) { 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return c; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate data blocks for one index-2 block */ 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=trie->index2[i2Block+ --i2]; 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block==prevBlock) { 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the block is the same as the previous one, and filled with highValue */ 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c-=UTRIE2_DATA_BLOCK_LENGTH; 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=block; 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block==nullBlock) { 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is the null data block */ 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highValue!=initialValue) { 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return c; 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c-=UTRIE2_DATA_BLOCK_LENGTH; 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=data32[block+ --j]; 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=highValue) { 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return c; 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --c; 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* deliver last range */ 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact a build-time trie. 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The compaction 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - removes blocks that are identical with earlier ones 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - overlaps adjacent blocks as much as possible (if overlap==TRUE) 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves blocks in steps of the data granularity 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves and overlaps blocks that overlap with multiple values in the overlap region 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It does not 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - try to move and overlap blocks that are not already adjacent 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactData(UNewTrie2 *trie) { 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t start, newStart, movedStart; 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t blockLength, overlap; 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, mapIndex, blockCount; 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* do not compact linear-ASCII data */ 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart=UTRIE2_DATA_START_OFFSET; 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[i]=start; 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Start with a block length of 64 for 2-byte UTF-8, 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * then switch to UTRIE2_DATA_BLOCK_LENGTH. 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) blockLength=64; 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) blockCount=blockLength>>UTRIE2_SHIFT_2; 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(start=newStart; start<trie->dataLength;) { 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * start: index of first entry of current block 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * newStart: index where the current block is to be moved 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (right after current end of already-compacted data) 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start==UNEWTRIE2_DATA_0800_OFFSET) { 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) blockLength=UTRIE2_DATA_BLOCK_LENGTH; 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) blockCount=1; 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* skip blocks that are not used */ 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance start to the next block */ 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=blockLength; 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* leave newStart with the previous block! */ 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for an identical block */ 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) >=0 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* found an identical block, set the other block's index value for the current block */ 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[mapIndex++]=movedStart; 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance start to the next block */ 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=blockLength; 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* leave newStart with the previous block! */ 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* see if the beginning of this block can be overlapped with the end of the previous block */ 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlap-=UTRIE2_DATA_GRANULARITY) {} 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overlap>0 || newStart<start) { 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* some overlap, or just move the whole block */ 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) movedStart=newStart-overlap; 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[mapIndex++]=movedStart; 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the non-overlapping indexes to their new positions */ 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=overlap; 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=blockLength-overlap; i>0; --i) { 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[newStart++]=trie->data[start++]; 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* no overlap && newStart==start */ { 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[mapIndex++]=start; 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE2_DATA_BLOCK_LENGTH; 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart=start; 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* now adjust the index-2 table */ 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<trie->index2Length; ++i) { 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Gap indexes are invalid (-1). Skip over the gap. */ 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i+=UNEWTRIE2_INDEX_GAP_LENGTH; 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ensure dataLength alignment */ 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[newStart++]=trie->initialValue; 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we saved some space */ 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n", 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)trie->dataLength, (long)newStart); 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=newStart; 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactIndex2(UNewTrie2 *trie) { 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, start, newStart, movedStart, overlap; 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* do not compact linear-BMP index-2 blocks */ 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart=UTRIE2_INDEX_2_BMP_LENGTH; 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[i]=start; 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Reduce the index table gap to what will be needed at runtime. */ 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * start: index of first entry of current block 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * newStart: index where the current block is to be moved 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (right after current end of already-compacted data) 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for an identical block */ 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) >=0 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* found an identical block, set the other block's index value for the current block */ 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance start to the next block */ 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* leave newStart with the previous block! */ 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* see if the beginning of this block can be overlapped with the end of the previous block */ 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* look for maximum overlap with the previous, adjacent block */ 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --overlap) {} 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overlap>0 || newStart<start) { 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* some overlap, or just move the whole block */ 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the non-overlapping indexes to their new positions */ 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=overlap; 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2[newStart++]=trie->index2[start++]; 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* no overlap && newStart==start */ { 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE2_SHIFT_1_2]=start; 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart=start; 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* now adjust the index-1 table */ 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Ensure data table alignment: 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Needs to be granularity-aligned for 16-bit trie 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (so that dataMove will be down-shiftable), 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and 2-aligned for uint32_t data. 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Arbitrary value: 0x3fffc not possible for real data. */ 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT; 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we saved some space */ 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n", 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)trie->index2Length, (long)newStart); 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2Length=newStart; 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie2 *newTrie; 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 highStart, suppHighStart; 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t highValue; 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie=trie->newTrie; 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* find highStart and round it up */ 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) highValue=utrie2_get32(trie, 0x10ffff); 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) highStart=findHighStart(newTrie, highValue); 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart==0x110000) { 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) highValue=trie->errorValue; 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set trie->highStart only after utrie2_get32(trie, highStart). 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->highStart=newTrie->highStart=highStart; 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n", 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)highStart, (long)highValue, (long)trie->initialValue); 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart<0x110000) { 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Blank out [highStart..10ffff] to release associated data blocks. */ 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactData(newTrie); 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart>0x10000) { 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactIndex2(newTrie); 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE2_DEBUG 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n", 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Store the highValue in the data array and round up the dataLength. 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Must be done after compactData() because that assumes that dataLength 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data[newTrie->dataLength++]=highValue; 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->data[newTrie->dataLength++]=trie->initialValue; 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie->isCompacted=TRUE; 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* serialization ------------------------------------------------------------ */ 1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the runtime index array. 1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. 1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (The actual maximum length is lower, 1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) 1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE2_MAX_INDEX_LENGTH 0xffff 1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Maximum length of the runtime data array. 1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, 1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and by uint16_t UTrie2Header.shiftedDataLength. 1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT) 1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Compact and internally serialize the trie. */ 1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { 1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie2 *newTrie; 1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2Header *header; 1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *p; 1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t *dest16; 1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, length; 1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t allIndexesLength; 1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 highStart; 1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* argument check */ 1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( trie==NULL || 1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTrie=trie->newTrie; 1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newTrie==NULL) { 1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* already frozen */ 1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie2ValueBits frozenValueBits= 1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; 1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(valueBits!=frozenValueBits) { 1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* compact if necessary */ 1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!newTrie->isCompacted) { 1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compactTrie(trie, pErrorCode); 1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) highStart=trie->highStart; 1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart<=0x10000) { 1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allIndexesLength=UTRIE2_INDEX_1_OFFSET; 1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allIndexesLength=newTrie->index2Length; 1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(valueBits==UTRIE2_16_VALUE_BITS) { 1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataMove=allIndexesLength; 1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataMove=0; 1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* are indexLength and dataLength within limits? */ 1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( /* for unshifted indexLength */ 1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || 1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for unshifted dataNullOffset */ 1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (dataMove+newTrie->dataNullOffset)>0xffff || 1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for unshifted 2-byte UTF-8 index-2 values */ 1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || 1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for shiftedDataLength */ 1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH 1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* calculate the total serialized length */ 1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=sizeof(UTrie2Header)+allIndexesLength*2; 1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(valueBits==UTRIE2_16_VALUE_BITS) { 1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length+=newTrie->dataLength*2; 1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length+=newTrie->dataLength*4; 1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->memory=uprv_malloc(length); 1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->memory==NULL) { 1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->length=length; 1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isMemoryOwned=TRUE; 1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->indexLength=allIndexesLength; 1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=newTrie->dataLength; 1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart<=0x10000) { 1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2NullOffset=0xffff; 1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset; 1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); 1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; 1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the header fields */ 1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header=(UTrie2Header *)trie->memory; 1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->signature=UTRIE2_SIG; /* "Tri2" */ 1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->options=(uint16_t)valueBits; 1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->indexLength=(uint16_t)trie->indexLength; 1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); 1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->index2NullOffset=trie->index2NullOffset; 1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->dataNullOffset=trie->dataNullOffset; 1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); 1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fill the index and data arrays */ 1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest16=(uint16_t *)(header+1); 1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index=dest16; 1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ 1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=(uint32_t *)newTrie->index2; 1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { 1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); 1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write UTF-8 2-byte index-2 values, not right-shifted */ 1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); 1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(highStart>0x10000) { 1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; 1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; 1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 16-bit index-1 values for supplementary code points */ 1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=index1Length; i>0; --i) { 1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); 1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * write the index-2 array values for supplementary code points, 1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove 1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=(uint32_t *)newTrie->index2+index2Offset; 1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=newTrie->index2Length-index2Offset; i>0; --i) { 1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); 1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the 16/32-bit data array */ 1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(valueBits) { 1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UTRIE2_16_VALUE_BITS: 1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 16-bit data values */ 1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data16=dest16; 1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=NULL; 1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=newTrie->data; 1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=newTrie->dataLength; i>0; --i) { 1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)*p++; 1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UTRIE2_32_VALUE_BITS: 1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 32-bit data values */ 1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data16=NULL; 1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=(uint32_t *)dest16; 1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4); 1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Delete the UNewTrie2. */ 1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(newTrie->data); 1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(newTrie); 1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->newTrie=NULL; 1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_isFrozen(const UTrie2 *trie) { 1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(trie->newTrie==NULL); 1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_serialize(UTrie2 *trie, 1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void *data, int32_t capacity, 1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* argument check */ 1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || 1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) 1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(capacity>=trie->length) { 1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(data, trie->memory, trie->length); 1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie->length; 1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is here to avoid a dependency from utrie2.cpp on utrie.c. 1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This file already depends on utrie.c. 1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Otherwise, this should be in utrie2.cpp right after utrie2_swap(). 1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie2_swapAnyVersion(const UDataSwapper *ds, 1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const void *inData, int32_t length, void *outData, 1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*pErrorCode)) { 1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(utrie2_getVersion(inData, length, TRUE)) { 1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 1: 1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return utrie_swap(ds, inData, length, outData, pErrorCode); 1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 2: 1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return utrie2_swap(ds, inData, length, outData, pErrorCode); 1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1471