1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2001-2009, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: utrie.c 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2001oct20 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Markus W. Scherer 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This is a common implementation of a "folded" trie. 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* It is a kind of compressed, serializable table of 16- or 32-bit values associated with 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Unicode code points (0..0x10ffff). 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)# include <stdio.h> 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "utrie.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* miscellaneous ------------------------------------------------------------ */ 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#undef ABS 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define ABS(x) ((x)>=0 ? (x) : -(x)) 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE UBool 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(length>0 && *s==*t) { 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++s; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++t; 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) --length; 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (UBool)(length==0); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* Building a trie ----------------------------------------------------------*/ 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_open(UNewTrie *fillIn, 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *aliasData, int32_t maxDataLength, 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t initialValue, uint32_t leadUnitValue, 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool latin1Linear) { 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie *trie; 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, j; 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH || 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (latin1Linear && maxDataLength<1024) 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fillIn!=NULL) { 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=fillIn; 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie)); 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL) { 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(trie, 0, sizeof(UNewTrie)); 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isAllocated= (UBool)(fillIn==NULL); 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(aliasData!=NULL) { 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data=aliasData; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isDataAllocated=FALSE; 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data=(uint32_t *)uprv_malloc(maxDataLength*4); 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->data==NULL) { 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie); 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isDataAllocated=TRUE; 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* preallocate and reset the first data block (block index 0) */ 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j=UTRIE_DATA_BLOCK_LENGTH; 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(latin1Linear) { 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */ 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* made sure above that maxDataLength>=1024 */ 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set indexes to point to consecutive data blocks */ 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=0; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */ 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[i++]=j; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) j+=UTRIE_DATA_BLOCK_LENGTH; 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while(i<(256>>UTRIE_SHIFT)); 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reset the initially allocated blocks to the initial value */ 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=j; 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(j>0) { 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[--j]=initialValue; 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->leadUnitValue=leadUnitValue; 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->indexLength=UTRIE_MAX_INDEX_LENGTH; 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataCapacity=maxDataLength; 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isLatin1Linear=latin1Linear; 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isCompacted=FALSE; 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie; 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UNewTrie * U_EXPORT2 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) { 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie *trie; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool isDataAllocated; 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* do not clone if other is not valid or already compacted */ 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(other==NULL || other->data==NULL || other->isCompacted) { 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* clone data */ 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) { 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isDataAllocated=FALSE; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) aliasDataCapacity=other->dataCapacity; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4); 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(aliasData==NULL) { 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isDataAllocated=TRUE; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie=utrie_open(fillIn, aliasData, aliasDataCapacity, 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) other->data[0], other->leadUnitValue, 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) other->isLatin1Linear); 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL) { 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(aliasData); 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->index, other->index, sizeof(trie->index)); 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->data, other->data, other->dataLength*4); 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=other->dataLength; 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isDataAllocated=isDataAllocated; 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_close(UNewTrie *trie) { 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie!=NULL) { 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->isDataAllocated) { 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie->data); 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data=NULL; 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->isAllocated) { 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(trie); 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t * U_EXPORT2 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_getData(UNewTrie *trie, int32_t *pLength) { 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || pLength==NULL) { 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pLength=trie->dataLength; 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie->data; 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_allocDataBlock(UNewTrie *trie) { 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t newBlock, newTop; 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=trie->dataLength; 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH; 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newTop>trie->dataCapacity) { 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* out of memory in the data array */ 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=newTop; 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return newBlock; 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * No error checking for illegal arguments. 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return -1 if no new data block available (out of memory in data array) 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_getDataBlock(UNewTrie *trie, UChar32 c) { 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t indexValue, newBlock; 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c>>=UTRIE_SHIFT; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) indexValue=trie->index[c]; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(indexValue>0) { 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return indexValue; 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* allocate a new data block */ 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newBlock=utrie_allocDataBlock(trie); 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newBlock<0) { 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* out of memory in the data array */ 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[c]=newBlock; 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy-on-write for a block from a setRange() */ 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH); 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return newBlock; 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return TRUE if the value was successfully set 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) { 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* valid, uncompacted trie and valid c? */ 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=utrie_getDataBlock(trie, c); 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[block+(c&UTRIE_MASK)]=value; 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) { 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* valid, uncompacted trie and valid c? */ 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pInBlockZero!=NULL) { 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pInBlockZero=TRUE; 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=trie->index[c>>UTRIE_SHIFT]; 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pInBlockZero!=NULL) { 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pInBlockZero= (UBool)(block==0); 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return trie->data[ABS(block)+(c&UTRIE_MASK)]; 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @internal 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, uint32_t initialValue, UBool overwrite) { 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pLimit; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pLimit=block+limit; 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block+=start; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overwrite) { 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(block<pLimit) { 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *block++=value; 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(block<pLimit) { 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(*block==initialValue) { 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *block=value; 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++block; 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UBool U_EXPORT2 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) { 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * repeat value in [start..limit[ 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * mark index values for repeat-data blocks by setting bit 31 of the index values 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * fill around existing values if any, if(overwrite) 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t initialValue; 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block, rest, repeatBlock; 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* valid, uncompacted trie and valid indexes? */ 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( trie==NULL || trie->isCompacted || 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start==limit) { 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; /* nothing to do */ 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialValue=trie->data[0]; 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start&UTRIE_MASK) { 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 nextStart; 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set partial block at [start..following block boundary[ */ 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=utrie_getDataBlock(trie, start); 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK; 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(nextStart<=limit) { 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, initialValue, overwrite); 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=nextStart; 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, initialValue, overwrite); 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* number of positions in the last, partial block */ 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) rest=limit&UTRIE_MASK; 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* round down limit to a block boundary */ 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit&=~UTRIE_MASK; 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* iterate over all-value blocks */ 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==initialValue) { 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=0; 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=-1; 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(start<limit) { 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get index value */ 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=trie->index[start>>UTRIE_SHIFT]; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block>0) { 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* already allocated, fill in value */ 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite); 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(trie->data[-block]!=value && (block==0 || overwrite)) { 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the repeatBlock instead of the current block 0 or range block */ 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(repeatBlock>=0) { 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[start>>UTRIE_SHIFT]=-repeatBlock; 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* create and set and fill the repeatBlock */ 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) repeatBlock=utrie_getDataBlock(trie, start); 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(repeatBlock<0) { 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the negative block number to indicate that it is a repeat block */ 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[start>>UTRIE_SHIFT]=-repeatBlock; 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE); 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE_DATA_BLOCK_LENGTH; 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(rest>0) { 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set partial block at [last block boundary..limit[ */ 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=utrie_getDataBlock(trie, start); 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite); 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findSameIndexBlock(const int32_t *idx, int32_t indexLength, 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t otherBlock) { 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block, i; 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) { 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) { 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(idx[block+i]!=idx[otherBlock+i]) { 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i==UTRIE_SURROGATE_BLOCK_COUNT) { 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return block; 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return indexLength; 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Fold the normalization data for supplementary code points into 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * a compact area on top of the BMP-part of the trie index, 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with the lead surrogates indexing this compact area. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Duplicate the index values for lead surrogates: 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * From inside the BMP area, where some may be overridden with folded values, 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to just after the BMP area, where they can be retrieved for 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * code point lookups. 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) { 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT]; 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t *idx; 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t indexLength, block; 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int countLeadCUWithData=0; 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=trie->index; 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy the lead surrogate indexes into a temporary array */ 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT); 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set all values for lead surrogate code *units* to leadUnitValue 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * so that, by default, runtime lookups will find no data for associated 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * supplementary code points, unless there is data for such code points 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which will result in a non-zero folding value below that is set for 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the respective lead units 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the above saved the indexes for surrogate code *points* 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * fill the indexes with simplified code from utrie_setRange32() 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->leadUnitValue==trie->data[0]) { 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=0; /* leadUnitValue==initialValue, use all-initial-value block */ 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* create and fill the repeatBlock */ 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=utrie_allocDataBlock(trie); 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block<0) { 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* data table overflow */ 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE); 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=-block; /* negative block number to indicate that it is a repeat block */ 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) { 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[c]=block; 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Fold significant index values into the area just after the BMP indexes. 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In case the first lead surrogate has significant data, 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * its index block must be used first (in which case the folding is a no-op). 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Later all folded index blocks are moved up one to insert the copied 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * lead surrogate indexes. 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) indexLength=UTRIE_BMP_INDEX_LENGTH; 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for any index (stage 1) entries for supplementary code points */ 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(c=0x10000; c<0x110000;) { 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(idx[c>>UTRIE_SHIFT]!=0) { 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* there is data, treat the full block for a lead surrogate */ 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c&=~0x3ff; 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++countLeadCUWithData; 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */ 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* is there an identical index block? */ 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT); 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * get a folded value for [c..c+0x400[ and, 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * if different from the value for the lead surrogate code point, 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * set it for the lead surrogate code unit 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT); 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) { 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!utrie_set32(trie, U16_LEAD(c), value)) { 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* data table overflow */ 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* if we did not find an identical index block... */ 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block==indexLength) { 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the actual index (stage 1) entries from the supplementary position to the new one */ 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(idx+indexLength, 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx+(c>>UTRIE_SHIFT), 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4*UTRIE_SURROGATE_BLOCK_COUNT); 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=0x400; 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH; 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(countLeadCUWithData>0) { 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("supplementary data for %d lead surrogates\n", countLeadCUWithData); 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * index array overflow? 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is to guarantee that a folding offset is of the form 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If the index is too large, then n>=1024 and more than 10 bits are necessary. 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * In fact, it can only ever become n==1024 with completely unfoldable data and 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the additional block of duplicated values for lead surrogates. 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(indexLength>=UTRIE_MAX_INDEX_LENGTH) { 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * make space for the lead surrogate index block and 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * insert it between the BMP indexes and the folded ones 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT, 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx+UTRIE_BMP_INDEX_LENGTH, 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4*(indexLength-UTRIE_BMP_INDEX_LENGTH)); 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH, 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) leadIndexes, 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 4*UTRIE_SURROGATE_BLOCK_COUNT); 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("trie index count: BMP %ld all Unicode %ld folded %ld\n", 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength); 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->indexLength=indexLength; 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set a value in the trie index map to indicate which data block 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * is referenced and which one is not. 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * utrie_compact() will remove data blocks that are not used at all. 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - 0 if it is used 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - -1 if it is not used 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findUnusedBlocks(UNewTrie *trie) { 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i; 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fill the entire map with "not used" */ 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4); 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* mark each block that _is_ used with 0 */ 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<trie->indexLength; ++i) { 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0; 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* never move the all-initial-value block 0 */ 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[0]=0; 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_findSameDataBlock(const uint32_t *data, int32_t dataLength, 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t otherBlock, int32_t step) { 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t block; 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ensure that we do not even partially get past dataLength */ 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataLength-=UTRIE_DATA_BLOCK_LENGTH; 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(block=0; block<=dataLength; block+=step) { 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) { 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return block; 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Compact a folded build-time trie. 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The compaction 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - removes blocks that are identical with earlier ones 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - overlaps adjacent blocks as much as possible (if overlap==TRUE) 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves blocks in steps of the data granularity 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - moves and overlaps blocks that overlap with multiple values in the overlap region 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * It does not 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - try to move and overlap blocks that are not already adjacent 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) { 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, start, newStart, overlapStart; 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* valid, uncompacted trie? */ 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL) { 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->isCompacted) { 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; /* nothing left to do */ 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* compaction */ 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* initialize the index map with "block is used/unused" flags */ 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) _findUnusedBlocks(trie); 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */ 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->isLatin1Linear && UTRIE_SHIFT<=8) { 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlapStart=UTRIE_DATA_BLOCK_LENGTH+256; 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlapStart=UTRIE_DATA_BLOCK_LENGTH; 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart=UTRIE_DATA_BLOCK_LENGTH; 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(start=newStart; start<trie->dataLength;) { 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * start: index of first entry of current block 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * newStart: index where the current block is to be moved 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (right after current end of already-compacted data) 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* skip blocks that are not used */ 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->map[start>>UTRIE_SHIFT]<0) { 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance start to the next block */ 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE_DATA_BLOCK_LENGTH; 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* leave newStart with the previous block! */ 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for an identical block */ 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( start>=overlapStart && 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (i=_findSameDataBlock(trie->data, newStart, start, 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH)) 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) >=0 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* found an identical block, set the other block's index value for the current block */ 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE_SHIFT]=i; 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance start to the next block */ 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE_DATA_BLOCK_LENGTH; 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* leave newStart with the previous block! */ 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* see if the beginning of this block can be overlapped with the end of the previous block */ 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(overlap && start>=overlapStart) { 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY; 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i); 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i-=UTRIE_DATA_GRANULARITY) {} 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=0; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i>0) { 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* some overlap */ 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE_SHIFT]=newStart-i; 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the non-overlapping indexes to their new positions */ 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=i; 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) { 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[newStart++]=trie->data[start++]; 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(newStart<start) { 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no overlap, just move the indexes to their new positions */ 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE_SHIFT]=newStart; 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) { 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data[newStart++]=trie->data[start++]; 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* no overlap && newStart==start */ { 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->map[start>>UTRIE_SHIFT]=start; 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newStart+=UTRIE_DATA_BLOCK_LENGTH; 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=newStart; 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* now adjust the index (stage 1) table */ 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<trie->indexLength; ++i) { 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]; 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we saved some space */ 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("compacting trie: count of 32-bit words %lu->%lu\n", 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)trie->dataLength, (long)newStart); 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=newStart; 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* serialization ------------------------------------------------------------ */ 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Default function for the folding value: 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Just store the offset (16 bits) if there is any non-initial-value entry. 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The offset parameter is never 0. 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Returning the offset itself is safe for UTRIE_SHIFT>=5 because 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which fits into 16-bit trie values; 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases. 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Theoretically, it would be safer for all possible UTRIE_SHIFT including 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * which would always result in a value of 0x40..0x43f 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (start/end 1k blocks of supplementary Unicode code points). 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * However, this would be uglier, and would not work for some existing 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * binary data file formats. 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Also, we do not plan to change UTRIE_SHIFT because it would change binary 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * data file formats, and we would probably not make it smaller because of 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the then even larger BMP index length even for empty tries. 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t U_CALLCONV 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, initialValue; 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 limit; 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool inBlockZero; 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialValue=trie->data[0]; 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=start+0x400; 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(start<limit) { 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=utrie_get32(trie, start, &inBlockZero); 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(inBlockZero) { 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE_DATA_BLOCK_LENGTH; 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(value!=initialValue) { 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (uint32_t)offset; 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++start; 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity, 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrieGetFoldedValue *getFoldedValue, 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool reduceTo16Bits, 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrieHeader *header; 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *p; 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t *dest16; 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, length; 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t* data = NULL; 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* argument check */ 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) { 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(getFoldedValue==NULL) { 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) getFoldedValue=defaultGetFoldedValue; 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data = (uint8_t*)dt; 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fold and compact if necessary, also checks that indexLength is within limits */ 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!trie->isCompacted) { 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* compact once without overlap to improve folding */ 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_compact(trie, FALSE, pErrorCode); 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fold the supplementary part of the index array */ 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_fold(trie, getFoldedValue, pErrorCode); 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* compact again with overlap for minimum data array length */ 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_compact(trie, TRUE, pErrorCode); 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isCompacted=TRUE; 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*pErrorCode)) { 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* is dataLength within limits? */ 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) { 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=sizeof(UTrieHeader)+2*trie->indexLength; 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(reduceTo16Bits) { 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length+=2*trie->dataLength; 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length+=4*trie->dataLength; 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length>capacity) { 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return length; /* preflighting */ 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UTRIE_DEBUG 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n", 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (long)trie->indexLength, (long)trie->dataLength, (long)length); 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the header fields */ 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header=(UTrieHeader *)data; 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data+=sizeof(UTrieHeader); 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->signature=0x54726965; /* "Trie" */ 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT); 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!reduceTo16Bits) { 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT; 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie->isLatin1Linear) { 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR; 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->indexLength=trie->indexLength; 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header->dataLength=trie->dataLength; 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(reduceTo16Bits) { 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=(uint32_t *)trie->index; 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest16=(uint16_t *)data; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=trie->indexLength; i>0; --i) { 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT); 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 16-bit data values */ 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=trie->data; 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=trie->dataLength; i>0; --i) { 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)*p++; 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p=(uint32_t *)trie->index; 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest16=(uint16_t *)data; 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=trie->indexLength; i>0; --i) { 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT); 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write 32-bit data values */ 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dest16, trie->data, 4*trie->dataLength); 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return length; 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* inverse to defaultGetFoldedValue() */ 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_defaultGetFoldingOffset(uint32_t data) { 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (int32_t)data; 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UTrieHeader *header; 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *p16; 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t options; 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enough data for a trie header? */ 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<sizeof(UTrieHeader)) { 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* check the signature */ 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) header=(const UTrieHeader *)data; 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(header->signature!=0x54726965) { 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get the options and check the shift values */ 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) options=header->options; 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0); 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get the length values */ 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->indexLength=header->indexLength; 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=header->dataLength; 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length-=(int32_t)sizeof(UTrieHeader); 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enough data for the index? */ 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<2*trie->indexLength) { 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16=(const uint16_t *)(header+1); 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index=p16; 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16+=trie->indexLength; 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length-=2*trie->indexLength; 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get the data */ 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) { 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<4*trie->dataLength) { 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=(const uint32_t *)p16; 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->initialValue=trie->data32[0]; 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength; 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<2*trie->dataLength) { 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_FORMAT_ERROR; 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the "data16" data is used via the index pointer */ 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=NULL; 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->initialValue=trie->index[trie->indexLength]; 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength; 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->getFoldingOffset=utrie_defaultGetFoldingOffset; 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return length; 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI int32_t U_EXPORT2 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_unserializeDummy(UTrie *trie, 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) void *data, int32_t length, 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t initialValue, uint32_t leadUnitValue, 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool make16BitTrie, 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t *p16; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t actualLength, latin1Length, i, limit; 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t block; 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* calculate the actual size of the dummy trie data */ 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* max(Latin-1, block 0) */ 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) latin1Length= UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH; 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT; 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength=latin1Length; 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(leadUnitValue!=initialValue) { 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH; 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualLength=trie->indexLength*2; 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(make16BitTrie) { 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualLength+=trie->dataLength*2; 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) actualLength+=trie->dataLength*4; 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enough space for the dummy trie? */ 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<actualLength) { 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return actualLength; 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->isLatin1Linear=TRUE; 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->initialValue=initialValue; 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fill the index and data arrays */ 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16=(uint16_t *)data; 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->index=p16; 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(make16BitTrie) { 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* indexes to block 0 */ 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT); 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=trie->indexLength; 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<limit; ++i) { 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16[i]=block; 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(leadUnitValue!=initialValue) { 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* indexes for lead surrogate code units to the block after Latin-1 */ 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=0xd800>>UTRIE_SHIFT; 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=0xdc00>>UTRIE_SHIFT; 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; i<limit; ++i) { 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16[i]=block; 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=NULL; 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Latin-1 data */ 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16+=trie->indexLength; 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<latin1Length; ++i) { 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16[i]=(uint16_t)initialValue; 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* data for lead surrogate code units */ 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(leadUnitValue!=initialValue) { 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(/* i=latin1Length */; i<limit; ++i) { 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16[i]=(uint16_t)leadUnitValue; 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *p32; 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* indexes to block 0 */ 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(p16, 0, trie->indexLength*2); 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(leadUnitValue!=initialValue) { 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* indexes for lead surrogate code units to the block after Latin-1 */ 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=0xd800>>UTRIE_SHIFT; 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=0xdc00>>UTRIE_SHIFT; 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; i<limit; ++i) { 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p16[i]=block; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->data32=p32=(uint32_t *)(p16+trie->indexLength); 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Latin-1 data */ 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<latin1Length; ++i) { 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p32[i]=initialValue; 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* data for lead surrogate code units */ 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(leadUnitValue!=initialValue) { 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(/* i=latin1Length */; i<limit; ++i) { 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) p32[i]=leadUnitValue; 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trie->getFoldingOffset=utrie_defaultGetFoldingOffset; 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return actualLength; 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* enumeration -------------------------------------------------------------- */ 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* default UTrieEnumValue() returns the input value itself */ 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t U_CALLCONV 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)enumSameValue(const void *context, uint32_t value) { 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return value; 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Enumerate all ranges of code points with the same relevant values. 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The values are transformed from the raw trie entries by the enumValue function. 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)utrie_enum(const UTrie *trie, 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) { 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *data32; 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *idx; 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, prevValue, initialValue; 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, prev; 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t l, i, j, block, prevBlock, nullBlock, offset; 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* check arguments */ 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(trie==NULL || trie->index==NULL || enumRange==NULL) { 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(enumValue==NULL) { 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumValue=enumSameValue; 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=trie->index; 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) data32=trie->data32; 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* get the enumeration value that corresponds to an initial-value trie data entry */ 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initialValue=enumValue(context, trie->initialValue); 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(data32==NULL) { 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nullBlock=trie->indexLength; 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nullBlock=0; 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set variables for previous range */ 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=0; 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=initialValue; 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate BMP - the main loop enumerates data blocks */ 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0, c=0; c<=0xffff; ++i) { 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(c==0xd800) { 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* skip lead surrogate code _units_, go to lead surr. code _points_ */ 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=UTRIE_BMP_INDEX_LENGTH; 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(c==0xdc00) { 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* go back to regular BMP code points */ 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=c>>UTRIE_SHIFT; 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=idx[i]<<UTRIE_INDEX_SHIFT; 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block==prevBlock) { 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the block is the same as the previous one, and filled with value */ 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH; 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(block==nullBlock) { 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is the all-initial-value block */ 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prevValue!=initialValue) { 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=initialValue; 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH; 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=block; 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=prevValue) { 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(j>0) { 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the block is not filled with all the same value */ 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=-1; 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=value; 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++c; 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate supplementary code points */ 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(l=0xd800; l<0xdc00;) { 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* lead surrogate access */ 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT; 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offset==nullBlock) { 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no entries for a whole block of lead surrogates */ 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prevValue!=initialValue) { 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=initialValue; 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) l+=UTRIE_DATA_BLOCK_LENGTH; 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH<<10; 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)]; 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate trail surrogates for this lead surrogate */ 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset=trie->getFoldingOffset(value); 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(offset<=0) { 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no data for this lead surrogate */ 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prevValue!=initialValue) { 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=initialValue; 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* nothing else to do for the supplementary code points for this lead surrogate */ 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=0x400; 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate code points for this lead surrogate */ 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=offset; 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offset+=UTRIE_SURROGATE_BLOCK_COUNT; 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy of most of the body of the BMP loop */ 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) block=idx[i]<<UTRIE_INDEX_SHIFT; 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(block==prevBlock) { 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the block is the same as the previous one, and filled with value */ 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH; 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(block==nullBlock) { 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this is the all-initial-value block */ 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prevValue!=initialValue) { 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=nullBlock; 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=initialValue; 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=UTRIE_DATA_BLOCK_LENGTH; 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=block; 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value!=prevValue) { 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prev<c) { 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!enumRange(context, prev, c, prevValue)) { 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(j>0) { 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the block is not filled with all the same value */ 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevBlock=-1; 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prev=c; 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) prevValue=value; 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++c; 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while(++i<offset); 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++l; 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* deliver last range */ 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumRange(context, prev, c, prevValue); 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1233