185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho****************************************************************************** 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 427f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 2001-2010, International Business Machines 585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Corporation and others. All Rights Reserved. 685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho****************************************************************************** 885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* file name: utrie2_builder.c 985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* encoding: US-ASCII 1085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* tab size: 8 (not used) 1185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* indentation:4 1285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 1385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* created on: 2008sep26 (split off from utrie2.c) 1485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* created by: Markus W. Scherer 1585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 1685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* This is a common implementation of a Unicode trie. 1785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* It is a kind of compressed, serializable table of 16- or 32-bit values associated with 1885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Unicode code points (0..0x10ffff). 1985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* This is the second common version of a Unicode trie (hence the name UTrie2). 2085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* See utrie2.h for a comparison. 2185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* 2285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* This file contains only the builder code. 2385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* See utrie2.c for the runtime and enumeration code. 2485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*/ 2585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 2685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho# include <stdio.h> 2785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 2885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 2985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "unicode/utypes.h" 3085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "cmemory.h" 3185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2.h" 3285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#include "utrie2_impl.h" 3385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 3427f654740f2a26ad62a5c155af9199af9e69b889claireho#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */ 3585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 3685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 3885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Implementation notes ----------------------------------------------------- */ 3985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 4085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 4185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values 4285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * have been chosen to minimize trie sizes overall. 4385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Most of the code is flexible enough to work with a range of values, 4485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * within certain limits. 4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 4685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Exception: Support for separate values for lead surrogate code _units_ 4785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * vs. code _points_ was added after the constants were fixed, 4885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and has not been tested nor particularly designed for different constant values. 4985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 5085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * part and back.) 5185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 5285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data 5385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH 5485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction 5585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * remapping) stops working. 5685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 5785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() 5885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * assumes that a single index-2 block is used for 0x400 code points 5985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * corresponding to one lead surrogate. 6085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains 6285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * more than one Unicode plane, and the split of the index-2 table into a BMP 6385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * part and a supplementary part, with a gap in between, would not work. 6485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 6585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because 6685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * there is data with more than 64k distinct values, 6785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * for example for Unihan collation with a separate collation weight per 6885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Han character. 6985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 7085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 7185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Building a trie ----------------------------------------------------------*/ 7285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 7385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoenum { 7485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** The null index-2 block, following the gap in the index-2 table. */ 7585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, 7685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 7785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** The start of allocated index-2 blocks. */ 7885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, 7985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 8085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 8185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The null data block. 8285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, 8385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * to work with 6-bit trail bytes from 2-byte UTF-8. 8485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 8585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, 8685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 8785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** The start of allocated data blocks. */ 8885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, 8985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 9085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /** 9185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The start of data blocks for U+0800 and above. 9285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Below, compaction uses a block length of 64 for 2-byte UTF-8. 9385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. 9485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Data values for 0x780 code points beyond ASCII. 9585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 9685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 9785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho}; 9885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 9985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Start with allocation of 16k data entries. */ 10085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) 10185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 10285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Grow about 8x each time. */ 10385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) 10485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 10585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 10685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocIndex2Block(UNewTrie2 *trie); 10785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 10885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2 10985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { 11085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2 *trie; 11185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNewTrie2 *newTrie; 11285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t *data; 11385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, j; 11485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 11585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 11685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 11785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 11885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 11985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 12085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); 12185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); 12285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie==NULL || newTrie==NULL || data==NULL) { 12385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trie); 12485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(newTrie); 12585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(data); 12685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 12785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return 0; 12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 13085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memset(trie, 0, sizeof(UTrie2)); 13185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->initialValue=initialValue; 13285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->errorValue=errorValue; 13385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->highStart=0x110000; 13485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->newTrie=newTrie; 13585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 13685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data=data; 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; 13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->initialValue=initialValue; 13985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->errorValue=errorValue; 14085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->highStart=0x110000; 14185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->firstFreeBlock=0; /* no free block in the list */ 14285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->isCompacted=FALSE; 14385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 14585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * preallocate and reset 14685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - ASCII 14785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - the bad-UTF-8-data block 14885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - the null data block 14985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 15085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<0x80; ++i) { 15185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data[i]=initialValue; 15285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 15385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(; i<0xc0; ++i) { 15485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data[i]=errorValue; 15585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 15685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { 15785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data[i]=initialValue; 15885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 15985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; 16085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; 16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ 16385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 16485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2[i]=j; 16585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->map[i]=1; 16685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 16785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reference counts for the bad-UTF-8-data block */ 16885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 16985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->map[i]=0; 17085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 17185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 17285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Reference counts for the null data block: all blocks except for the ASCII blocks. 17385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Plus 1 so that we don't drop this block during compaction. 17485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Plus as many as needed for lead surrogate code points. 17585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 17685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* i==newTrie->dataNullOffset */ 17785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->map[i++]= 17885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (0x110000>>UTRIE2_SHIFT_2)- 17985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (0x80>>UTRIE2_SHIFT_2)+ 18085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 1+ 18185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTRIE2_LSCP_INDEX_2_LENGTH; 18285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho j+=UTRIE2_DATA_BLOCK_LENGTH; 18385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { 18485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->map[i]=0; 18585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 18785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 18885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * set the remaining indexes in the BMP index-2 block 18985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * to the null data block 19085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 19185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { 19285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; 19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 19585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 19685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Fill the index gap with impossible values so that compaction 19785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * does not overlap other index-2 blocks with the gap. 19885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 19985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { 20085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; 20185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 20385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the indexes in the null index-2 block */ 20485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { 20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; 20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 20785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; 20885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; 20985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 21085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the index-1 indexes for the linear index-2 block */ 21185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0, j=0; 21285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 21385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH 21485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 21585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index1[i]=j; 21685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 21785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 21885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the remaining index-1 indexes to the null index-2 block */ 21985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 22085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; 22185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 22285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 22385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 22485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Preallocate and reset data for U+0080..U+07ff, 22585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * for 2-byte UTF-8 which will be compacted in 64-blocks 22685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. 22785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 22885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { 22985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_set32(trie, i, initialValue, pErrorCode); 23085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 23185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 23285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return trie; 23385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 23485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 23585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UNewTrie2 * 23685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocloneBuilder(const UNewTrie2 *other) { 23785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNewTrie2 *trie; 23885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 23985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); 24085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie==NULL) { 24185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 24285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 24385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 24485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); 24585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->data==NULL) { 24685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trie); 24785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 24885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 24985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataCapacity=other->dataCapacity; 25085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 25185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* clone data */ 25285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); 25385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->index2, other->index2, other->index2Length*4); 25485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2NullOffset=other->index2NullOffset; 25585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2Length=other->index2Length; 25685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 25785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->data, other->data, other->dataLength*4); 25885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataNullOffset=other->dataNullOffset; 25985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataLength=other->dataLength; 26085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 26185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* reference counters */ 26285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->isCompacted) { 26385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->firstFreeBlock=0; 26485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 26585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->map, other->map, (other->dataLength>>UTRIE2_SHIFT_2)*4); 26685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->firstFreeBlock=other->firstFreeBlock; 26785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 26885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 26985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->initialValue=other->initialValue; 27085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->errorValue=other->errorValue; 27185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->highStart=other->highStart; 27285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->isCompacted=other->isCompacted; 27385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 27485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return trie; 27585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 27685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 27785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2 27885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { 27985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2 *trie; 28085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 28185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 28285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 28385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 28485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { 28585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 28685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 28785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 28885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 28985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); 29085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie==NULL) { 29185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 29285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 29385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie, other, sizeof(UTrie2)); 29485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 29585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->memory!=NULL) { 29685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->memory=uprv_malloc(other->length); 29785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->memory!=NULL) { 29885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->isMemoryOwned=TRUE; 29985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->memory, other->memory, other->length); 30085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 30185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* make the clone's pointers point to its own memory */ 30285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); 30385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->data16!=NULL) { 30485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); 30585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 30685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->data32!=NULL) { 30785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); 30885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 30985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 31085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else /* other->newTrie!=NULL */ { 31185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->newTrie=cloneBuilder(other->newTrie); 31285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 31385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 31485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->memory==NULL && trie->newTrie==NULL) { 31585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trie); 31685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie=NULL; 31785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 31885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return trie; 31985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 32085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 32185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hotypedef struct NewTrieAndStatus { 32285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2 *trie; 32385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode errorCode; 32485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool exclusiveLimit; /* rather than inclusive range end */ 32585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} NewTrieAndStatus; 32685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 32785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UBool U_CALLCONV 32885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocopyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { 32985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NewTrieAndStatus *nt=(NewTrieAndStatus *)context; 33085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value!=nt->trie->initialValue) { 33185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(nt->exclusiveLimit) { 33285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho --end; 33385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 33485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(start==end) { 33585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_set32(nt->trie, start, value, &nt->errorCode); 33685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 33785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); 33885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 33985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return U_SUCCESS(nt->errorCode); 34085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 34185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return TRUE; 34285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 34385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 34485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 34585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 34685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 34785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie_printLengths(const UTrie *trie) { 34885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long indexLength=trie->indexLength; 34985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long dataLength=(long)trie->dataLength; 35085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); 35185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", 35285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho indexLength, dataLength, totalLength); 35385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 35485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 35585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 35685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_printLengths(const UTrie2 *trie, const char *which) { 35785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long indexLength=trie->indexLength; 35885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long dataLength=(long)trie->dataLength; 35985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); 36085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n", 36185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho which, indexLength, dataLength, totalLength); 36285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 36385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 36485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 36585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2 36685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { 36785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NewTrieAndStatus context; 36885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar lead; 36985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 37085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 37185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 37285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 37385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { 37485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 37585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 37685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 37785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->newTrie!=NULL && !other->newTrie->isCompacted) { 37885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ 37985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 38085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 38185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Clone the frozen trie by enumerating it and building a new one. */ 38285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); 38385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 38485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 38585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 38685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.exclusiveLimit=FALSE; 38785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.errorCode=*pErrorCode; 38885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_enum(other, NULL, copyEnumRange, &context); 38985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=context.errorCode; 39085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(lead=0xd800; lead<0xdc00; ++lead) { 39185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value; 39285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(other->data32==NULL) { 39385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); 39485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 39585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); 39685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 39785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value!=other->initialValue) { 39885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); 39985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 40085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 40185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 40285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_close(context.trie); 40385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.trie=NULL; 40485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 40585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return context.trie; 40685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 40785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 40885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ 40985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UTrie2 * U_EXPORT2 41085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { 41185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho NewTrieAndStatus context; 41285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar lead; 41385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 41485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 41585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 41685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 41785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie1==NULL) { 41885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 41985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 42085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 42185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); 42285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 42385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 42485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 42585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.exclusiveLimit=TRUE; 42685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.errorCode=*pErrorCode; 42785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie_enum(trie1, NULL, copyEnumRange, &context); 42885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=context.errorCode; 42985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(lead=0xd800; lead<0xdc00; ++lead) { 43085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value; 43185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie1->data32==NULL) { 43285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=UTRIE_GET16_FROM_LEAD(trie1, lead); 43385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 43485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=UTRIE_GET32_FROM_LEAD(trie1, lead); 43585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 43685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value!=trie1->initialValue) { 43785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); 43885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 43985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 44085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_SUCCESS(*pErrorCode)) { 44185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_freeze(context.trie, 44285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, 44385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho pErrorCode); 44485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 44585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 44685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_SUCCESS(*pErrorCode)) { 44785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie_printLengths(trie1); 44885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_printLengths(context.trie, "fromUTrie"); 44985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 45085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 45185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 45285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_close(context.trie); 45385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho context.trie=NULL; 45485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 45585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return context.trie; 45685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 45785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 45885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool 45985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoisInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 46085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i2, block; 46185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 46285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_IS_LEAD(c) && forLSCP) { 46385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ 46485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (c>>UTRIE2_SHIFT_2); 46585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 46685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=trie->index1[c>>UTRIE2_SHIFT_1]+ 46785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); 46885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 46985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=trie->index2[i2]; 47085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UBool)(block==trie->dataNullOffset); 47185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 47285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 47385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 47485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocIndex2Block(UNewTrie2 *trie) { 47585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t newBlock, newTop; 47685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 47785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newBlock=trie->index2Length; 47885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; 47985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(newTop>LENGTHOF(trie->index2)) { 48085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 48185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Should never occur. 48285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, 48385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * or the code writes more values than should be possible. 48485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 48585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 48685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 48785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2Length=newTop; 48885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); 48985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return newBlock; 49085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 49185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 49285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 49385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HogetIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 49485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i1, i2; 49585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 49685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_IS_LEAD(c) && forLSCP) { 49785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return UTRIE2_LSCP_INDEX_2_OFFSET; 49885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 49985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 50085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i1=c>>UTRIE2_SHIFT_1; 50185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=trie->index1[i1]; 50285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2==trie->index2NullOffset) { 50385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=allocIndex2Block(trie); 50485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2<0) { 50585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; /* program error */ 50685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 50785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index1[i1]=i2; 50885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return i2; 51085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 51185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 51285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 51385bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoallocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { 51485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t newBlock, newTop; 51585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 51685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->firstFreeBlock!=0) { 51785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* get the first free block */ 51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newBlock=trie->firstFreeBlock; 51985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; 52085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 52185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* get a new block from the high end */ 52285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newBlock=trie->dataLength; 52385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; 52485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(newTop>trie->dataCapacity) { 52585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* out of memory in the data array */ 52685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t capacity; 52785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t *data; 52885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 52985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { 53085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; 53185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { 53285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho capacity=UNEWTRIE2_MAX_DATA_LENGTH; 53385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 53485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 53585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Should never occur. 53685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, 53785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * or the code writes more values than should be possible. 53885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 53985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 54085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 54185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho data=(uint32_t *)uprv_malloc(capacity*4); 54285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(data==NULL) { 54385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 54485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(data, trie->data, trie->dataLength*4); 54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trie->data); 54785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data=data; 54885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataCapacity=capacity; 54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 55085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataLength=newTop; 55185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 55285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); 55385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[newBlock>>UTRIE2_SHIFT_2]=0; 55485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return newBlock; 55585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 55785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* call when the block's reference counter reaches 0 */ 55885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 55985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoreleaseDataBlock(UNewTrie2 *trie, int32_t block) { 56085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* put this block at the front of the free-block chain */ 56185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; 56285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->firstFreeBlock=block; 56385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 56485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 56585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool 56685bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoisWritableBlock(UNewTrie2 *trie, int32_t block) { 56785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); 56885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 56985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 57085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE void 57185bf2e2fbc60a9f938064abc8127d61da7d19882Claire HosetIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { 57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t oldBlock; 57385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ 57485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho oldBlock=trie->index2[i2]; 57585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho releaseDataBlock(trie, oldBlock); 57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 57885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2[i2]=block; 57985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 58085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 58185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 58285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * No error checking for illegal arguments. 58385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 58485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return -1 if no new data block available (out of memory in data array) 58585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 58885bf2e2fbc60a9f938064abc8127d61da7d19882Claire HogetDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { 58985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i2, oldBlock, newBlock; 59085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 59185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=getIndex2Block(trie, c, forLSCP); 59285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2<0) { 59385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; /* program error */ 59485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 59585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 59685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 59785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho oldBlock=trie->index2[i2]; 59885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(isWritableBlock(trie, oldBlock)) { 59985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return oldBlock; 60085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 60185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 60285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* allocate a new data block */ 60385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newBlock=allocDataBlock(trie, oldBlock); 60485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(newBlock<0) { 60585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* out of memory in the data array */ 60685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 60785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 60885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setIndex2Entry(trie, i2, newBlock); 60985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return newBlock; 61085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 61185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 61285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 61385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @return TRUE if the value was successfully set 61485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 61585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 61685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoset32(UNewTrie2 *trie, 61785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 c, UBool forLSCP, uint32_t value, 61885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 61985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t block; 62085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie==NULL || trie->isCompacted) { 62285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_NO_WRITE_PERMISSION; 62385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 62485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 62585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 62685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=getDataBlock(trie, c, forLSCP); 62785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(block<0) { 62885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 62985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 63085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 63185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 63285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data[block+(c&UTRIE2_DATA_MASK)]=value; 63385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 63485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 63585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 63685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { 63785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 63885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 63985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 64085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if((uint32_t)c>0x10ffff) { 64185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 64285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 64385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 64485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho set32(trie->newTrie, c, TRUE, value, pErrorCode); 64585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 64685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 64785bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 64885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, 64985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 c, uint32_t value, 65085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 65185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 65285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 65385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 65485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(!U_IS_LEAD(c)) { 65585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 65685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 65785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 65885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho set32(trie->newTrie, c, FALSE, value, pErrorCode); 65985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 66085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 66185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 66285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HowriteBlock(uint32_t *block, uint32_t value) { 66385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; 66485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(block<limit) { 66585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *block++=value; 66685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 66785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 66885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 66985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 67085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * initialValue is ignored if overwrite=TRUE 67185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * @internal 67285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 67385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 67485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofillBlock(uint32_t *block, UChar32 start, UChar32 limit, 67585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value, uint32_t initialValue, UBool overwrite) { 67685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t *pLimit; 67785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 67885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho pLimit=block+limit; 67985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block+=start; 68085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(overwrite) { 68185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(block<pLimit) { 68285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *block++=value; 68385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 68485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 68585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(block<pLimit) { 68685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(*block==initialValue) { 68785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *block=value; 68885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 68985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++block; 69085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 69185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 69285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 69385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 69485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 69585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_setRange32(UTrie2 *trie, 69685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 start, UChar32 end, 69785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value, UBool overwrite, 69885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 69985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 70085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * repeat value in [start..end] 70185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * mark index values for repeat-data blocks by setting bit 31 of the index values 70285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * fill around existing values if any, if(overwrite) 70385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 70485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNewTrie2 *newTrie; 70585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t block, rest, repeatBlock; 70685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 limit; 70785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 70885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 70985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 71085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 71185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { 71285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 71385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 71485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 71585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie=trie->newTrie; 71685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(newTrie==NULL || newTrie->isCompacted) { 71785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_NO_WRITE_PERMISSION; 71885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 71985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 72085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(!overwrite && value==newTrie->initialValue) { 72185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; /* nothing to do */ 72285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 72385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 72485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho limit=end+1; 72585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(start&UTRIE2_DATA_MASK) { 72685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 nextStart; 72785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 72885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set partial block at [start..following block boundary[ */ 72985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=getDataBlock(newTrie, start, TRUE); 73085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(block<0) { 73185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 73285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 73385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 73485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 73585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; 73685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(nextStart<=limit) { 73785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, 73885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value, newTrie->initialValue, overwrite); 73985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start=nextStart; 74085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 74185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, 74285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value, newTrie->initialValue, overwrite); 74385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 74485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 74585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 74685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 74785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* number of positions in the last, partial block */ 74885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho rest=limit&UTRIE2_DATA_MASK; 74985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 75085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* round down limit to a block boundary */ 75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho limit&=~UTRIE2_DATA_MASK; 75285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 75385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* iterate over all-value blocks */ 75485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value==newTrie->initialValue) { 75585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho repeatBlock=newTrie->dataNullOffset; 75685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 75785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho repeatBlock=-1; 75885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 75985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 76085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(start<limit) { 76185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i2; 76285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool setRepeatBlock=FALSE; 76385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 76485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { 76585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ 76685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 76785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 76885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 76985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* get index value */ 77085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2=getIndex2Block(newTrie, start, TRUE); 77185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2<0) { 77285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 77385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 77485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 77585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; 77685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=newTrie->index2[i2]; 77785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(isWritableBlock(newTrie, block)) { 77885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* already allocated */ 77985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { 78085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 78185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * We overwrite all values, and it's not a 78285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * protected (ASCII-linear or 2-byte UTF-8) block: 78385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * replace with the repeatBlock. 78485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 78585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setRepeatBlock=TRUE; 78685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 78785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* !overwrite, or protected block: just write the values into this block */ 78885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fillBlock(newTrie->data+block, 78985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 0, UTRIE2_DATA_BLOCK_LENGTH, 79085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value, newTrie->initialValue, overwrite); 79185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 79285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { 79385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 79485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Set the repeatBlock instead of the null block or previous repeat block: 79585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 79685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * If !isWritableBlock() then all entries in the block have the same value 79785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * because it's the null block or a range block (the repeatBlock from a previous 79885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * call to utrie2_setRange32()). 79985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * No other blocks are used multiple times before compacting. 80085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 80185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The null block is the only non-writable block with the initialValue because 80285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * of the repeatBlock initialization above. (If value==initialValue, then 80385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * the repeatBlock will be the null data block.) 80485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 80585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * We set our repeatBlock if the desired value differs from the block's value, 80685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and if we overwrite any data or if the data is all initial values 80785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (which is the same as the block being the null block, see above). 80885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 80985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setRepeatBlock=TRUE; 81085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 81185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(setRepeatBlock) { 81285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(repeatBlock>=0) { 81385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho setIndex2Entry(newTrie, i2, repeatBlock); 81485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 81585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* create and set and fill the repeatBlock */ 81685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho repeatBlock=getDataBlock(newTrie, start, TRUE); 81785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(repeatBlock<0) { 81885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 81985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 82085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 82185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho writeBlock(newTrie->data+repeatBlock, value); 82285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 82385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 82485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 82585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=UTRIE2_DATA_BLOCK_LENGTH; 82685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 82785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 82885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(rest>0) { 82985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set partial block at [last block boundary..limit[ */ 83085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=getDataBlock(newTrie, start, TRUE); 83185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(block<0) { 83285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 83385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 83485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 83585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 83685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); 83785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 83885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 83985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 84085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 84185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 84285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* compaction --------------------------------------------------------------- */ 84385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 84485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool 84585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoequal_int32(const int32_t *s, const int32_t *t, int32_t length) { 84685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(length>0 && *s==*t) { 84785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++s; 84885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++t; 84985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho --length; 85085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 85185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UBool)(length==0); 85285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 85385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 85485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic U_INLINE UBool 85585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hoequal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { 85685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(length>0 && *s==*t) { 85785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++s; 85885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ++t; 85985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho --length; 86085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 86185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UBool)(length==0); 86285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 86385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 86585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { 86685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t block; 86785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 86885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ensure that we do not even partially get past index2Length */ 86985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; 87085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 87185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(block=0; block<=index2Length; ++block) { 87285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { 87385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return block; 87485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 87585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 87685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 87785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 87885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 87985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic int32_t 88085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { 88185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t block; 88285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 88385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ensure that we do not even partially get past dataLength */ 88485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho dataLength-=blockLength; 88585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 88685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { 88785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(equal_uint32(data+block, data+otherBlock, blockLength)) { 88885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return block; 88985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 89085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 89185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return -1; 89285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 89385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 89485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 89585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Find the start of the last range in the trie by enumerating backward. 89685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Indexes for supplementary code points higher than this will be omitted. 89785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 89885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic UChar32 89985bf2e2fbc60a9f938064abc8127d61da7d19882Claire HofindHighStart(UNewTrie2 *trie, uint32_t highValue) { 90085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const uint32_t *data32; 90185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 90285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value, initialValue; 90385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 c, prev; 90485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; 90585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 90685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho data32=trie->data; 90785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho initialValue=trie->initialValue; 90885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 90985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho index2NullOffset=trie->index2NullOffset; 91085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho nullBlock=trie->dataNullOffset; 91185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 91285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set variables for previous range */ 91385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highValue==initialValue) { 91485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevI2Block=index2NullOffset; 91585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevBlock=nullBlock; 91685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 91785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevI2Block=-1; 91885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevBlock=-1; 91985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 92085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prev=0x110000; 92185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 92285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* enumerate index-2 blocks */ 92385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i1=UNEWTRIE2_INDEX_1_LENGTH; 92485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho c=prev; 92585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while(c>0) { 92685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i2Block=trie->index1[--i1]; 92785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2Block==prevI2Block) { 92885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* the index-2 block is the same as the previous one, and filled with highValue */ 92985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 93085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 93185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 93285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevI2Block=i2Block; 93385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i2Block==index2NullOffset) { 93485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* this is the null index-2 block */ 93585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highValue!=initialValue) { 93685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return c; 93785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 93885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho c-=UTRIE2_CP_PER_INDEX_1_ENTRY; 93985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 94085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* enumerate data blocks for one index-2 block */ 94185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { 94285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho block=trie->index2[i2Block+ --i2]; 94385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(block==prevBlock) { 94485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* the block is the same as the previous one, and filled with highValue */ 94585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho c-=UTRIE2_DATA_BLOCK_LENGTH; 94685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 94785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 94885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho prevBlock=block; 94985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(block==nullBlock) { 95085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* this is the null data block */ 95185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highValue!=initialValue) { 95285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return c; 95385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 95485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho c-=UTRIE2_DATA_BLOCK_LENGTH; 95585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 95685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { 95785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=data32[block+ --j]; 95885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(value!=highValue) { 95985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return c; 96085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho --c; 96285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 96785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 96885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* deliver last range */ 96985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return 0; 97085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 97185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 97285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* 97385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Compact a build-time trie. 97485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 97585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * The compaction 97685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - removes blocks that are identical with earlier ones 97785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - overlaps adjacent blocks as much as possible (if overlap==TRUE) 97885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - moves blocks in steps of the data granularity 97985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - moves and overlaps blocks that overlap with multiple values in the overlap region 98085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * 98185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * It does not 98285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * - try to move and overlap blocks that are not already adjacent 98385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 98485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 98585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactData(UNewTrie2 *trie) { 98685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t start, newStart, movedStart; 98785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t blockLength, overlap; 98885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, mapIndex, blockCount; 98985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 99085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* do not compact linear-ASCII data */ 99185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newStart=UTRIE2_DATA_START_OFFSET; 99285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { 99385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[i]=start; 99485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 99585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 99685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 99785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Start with a block length of 64 for 2-byte UTF-8, 99885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * then switch to UTRIE2_DATA_BLOCK_LENGTH. 99985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 100085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho blockLength=64; 100185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho blockCount=blockLength>>UTRIE2_SHIFT_2; 100285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(start=newStart; start<trie->dataLength;) { 100385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 100485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * start: index of first entry of current block 100585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * newStart: index where the current block is to be moved 100685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (right after current end of already-compacted data) 100785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 100885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(start==UNEWTRIE2_DATA_0800_OFFSET) { 100985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho blockLength=UTRIE2_DATA_BLOCK_LENGTH; 101085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho blockCount=1; 101185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 101285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 101385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* skip blocks that are not used */ 101485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { 101585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* advance start to the next block */ 101685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=blockLength; 101785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 101885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* leave newStart with the previous block! */ 101985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 102085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 102185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 102285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* search for an identical block */ 102385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) 102485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho >=0 102585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 102685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* found an identical block, set the other block's index value for the current block */ 102785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 102885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[mapIndex++]=movedStart; 102985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 103085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 103185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 103285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* advance start to the next block */ 103385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=blockLength; 103485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 103585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* leave newStart with the previous block! */ 103685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 103785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 103885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 103985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* see if the beginning of this block can be overlapped with the end of the previous block */ 104085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ 104185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; 104285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); 104385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho overlap-=UTRIE2_DATA_GRANULARITY) {} 104485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 104585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(overlap>0 || newStart<start) { 104685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* some overlap, or just move the whole block */ 104785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho movedStart=newStart-overlap; 104885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 104985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[mapIndex++]=movedStart; 105085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho movedStart+=UTRIE2_DATA_BLOCK_LENGTH; 105185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 105285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 105385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* move the non-overlapping indexes to their new positions */ 105485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=overlap; 105585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=blockLength-overlap; i>0; --i) { 105685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data[newStart++]=trie->data[start++]; 105785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 105885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else /* no overlap && newStart==start */ { 105985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { 106085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[mapIndex++]=start; 106185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=UTRIE2_DATA_BLOCK_LENGTH; 106285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 106385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newStart=start; 106485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 106585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 106685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 106785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* now adjust the index-2 table */ 106885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<trie->index2Length; ++i) { 106985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { 107085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Gap indexes are invalid (-1). Skip over the gap. */ 107185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho i+=UNEWTRIE2_INDEX_GAP_LENGTH; 107285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 107385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; 107485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 107585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; 107685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 107785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ensure dataLength alignment */ 107885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { 107985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data[newStart++]=trie->initialValue; 108085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 108185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 108285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 108385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* we saved some space */ 108485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n", 108585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (long)trie->dataLength, (long)newStart); 108685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 108785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 108885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataLength=newStart; 108985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 109085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 109185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 109285bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactIndex2(UNewTrie2 *trie) { 109385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, start, newStart, movedStart, overlap; 109485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 109585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* do not compact linear-BMP index-2 blocks */ 109685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newStart=UTRIE2_INDEX_2_BMP_LENGTH; 109785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { 109885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[i]=start; 109985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 110085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 110185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Reduce the index table gap to what will be needed at runtime. */ 110285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); 110385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 110485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { 110585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 110685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * start: index of first entry of current block 110785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * newStart: index where the current block is to be moved 110885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (right after current end of already-compacted data) 110985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 111085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 111185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* search for an identical block */ 111285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) 111385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho >=0 111485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 111585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* found an identical block, set the other block's index value for the current block */ 111685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; 111785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 111885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* advance start to the next block */ 111985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 112085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 112185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* leave newStart with the previous block! */ 112285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho continue; 112385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 112485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 112585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* see if the beginning of this block can be overlapped with the end of the previous block */ 112685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* look for maximum overlap with the previous, adjacent block */ 112785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; 112885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); 112985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho --overlap) {} 113085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 113185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(overlap>0 || newStart<start) { 113285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* some overlap, or just move the whole block */ 113385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; 113485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 113585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* move the non-overlapping indexes to their new positions */ 113685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=overlap; 113785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { 113885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2[newStart++]=trie->index2[start++]; 113985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 114085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else /* no overlap && newStart==start */ { 114185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->map[start>>UTRIE2_SHIFT_1_2]=start; 114285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho start+=UTRIE2_INDEX_2_BLOCK_LENGTH; 114385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newStart=start; 114485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 114585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 114685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 114785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* now adjust the index-1 table */ 114885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { 114985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; 115085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 115185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; 115285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 115385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 115485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Ensure data table alignment: 115585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Needs to be granularity-aligned for 16-bit trie 115685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (so that dataMove will be down-shiftable), 115785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and 2-aligned for uint32_t data. 115885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 115985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { 116085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Arbitrary value: 0x3fffc not possible for real data. */ 116185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT; 116285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 116385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 116485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 116585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* we saved some space */ 116685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n", 116785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (long)trie->index2Length, (long)newStart); 116885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 116985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 117085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2Length=newStart; 117185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 117285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 117385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Hostatic void 117485bf2e2fbc60a9f938064abc8127d61da7d19882Claire HocompactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { 117585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNewTrie2 *newTrie; 117685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 highStart, suppHighStart; 117785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t highValue; 117885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 117985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie=trie->newTrie; 118085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 118185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* find highStart and round it up */ 118285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho highValue=utrie2_get32(trie, 0x10ffff); 118385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho highStart=findHighStart(newTrie, highValue); 118485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); 118585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart==0x110000) { 118685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho highValue=trie->errorValue; 118785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 118885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 118985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 119085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Set trie->highStart only after utrie2_get32(trie, highStart). 119185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. 119285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 119385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->highStart=newTrie->highStart=highStart; 119485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 119585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 119685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n", 119785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (long)highStart, (long)highValue, (long)trie->initialValue); 119885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 119985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 120085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart<0x110000) { 120185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Blank out [highStart..10ffff] to release associated data blocks. */ 120285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; 120385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); 120485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 120585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 120685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 120785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 120885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 120985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho compactData(newTrie); 121085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart>0x10000) { 121185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho compactIndex2(newTrie); 121285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#ifdef UTRIE2_DEBUG 121385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 121485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n", 121585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); 121685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#endif 121785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 121885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 121985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 122085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Store the highValue in the data array and round up the dataLength. 122185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Must be done after compactData() because that assumes that dataLength 122285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. 122385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 122485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data[newTrie->dataLength++]=highValue; 122585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { 122685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->data[newTrie->dataLength++]=trie->initialValue; 122785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 122885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 122985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie->isCompacted=TRUE; 123085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 123185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 123285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* serialization ------------------------------------------------------------ */ 123385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 123485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 123585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Maximum length of the runtime index array. 123685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. 123785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (The actual maximum length is lower, 123885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) 123985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 124085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UTRIE2_MAX_INDEX_LENGTH 0xffff 124185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 124285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/** 124385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Maximum length of the runtime data array. 124485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, 124585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * and by uint16_t UTrie2Header.shiftedDataLength. 124685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 124785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT) 124885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 124985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho/* Compact and internally serialize the trie. */ 125085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI void U_EXPORT2 125185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { 125285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UNewTrie2 *newTrie; 125385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2Header *header; 125485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t *p; 125585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint16_t *dest16; 125685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, length; 125785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t allIndexesLength; 125885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t dataMove; /* >0 if the data is moved to the end of the index array */ 125985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar32 highStart; 126085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 126185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* argument check */ 126285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 126385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 126485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 126585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( trie==NULL || 126685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits 126785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 126885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 126985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 127085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 127185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho newTrie=trie->newTrie; 127285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(newTrie==NULL) { 127385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* already frozen */ 127485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2ValueBits frozenValueBits= 127585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; 127685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(valueBits!=frozenValueBits) { 127785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 127885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 127985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 128085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 128185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 128285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* compact if necessary */ 128385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(!newTrie->isCompacted) { 128485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho compactTrie(trie, pErrorCode); 128585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 128685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 128785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 128885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 128985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho highStart=trie->highStart; 129085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 129185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart<=0x10000) { 129285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho allIndexesLength=UTRIE2_INDEX_1_OFFSET; 129385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 129485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho allIndexesLength=newTrie->index2Length; 129585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 129685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(valueBits==UTRIE2_16_VALUE_BITS) { 129785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho dataMove=allIndexesLength; 129885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 129985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho dataMove=0; 130085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 130185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 130285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* are indexLength and dataLength within limits? */ 130385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( /* for unshifted indexLength */ 130485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || 130585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* for unshifted dataNullOffset */ 130685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (dataMove+newTrie->dataNullOffset)>0xffff || 130785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* for unshifted 2-byte UTF-8 index-2 values */ 130885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || 130985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* for shiftedDataLength */ 131085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH 131185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 131285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 131385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 131485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 131585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 131685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* calculate the total serialized length */ 131785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho length=sizeof(UTrie2Header)+allIndexesLength*2; 131885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(valueBits==UTRIE2_16_VALUE_BITS) { 131985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho length+=newTrie->dataLength*2; 132085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 132185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho length+=newTrie->dataLength*4; 132285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 132385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 132485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->memory=uprv_malloc(length); 132585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(trie->memory==NULL) { 132685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 132785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 132885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 132985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->length=length; 133085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->isMemoryOwned=TRUE; 133185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 133285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->indexLength=allIndexesLength; 133385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataLength=newTrie->dataLength; 133485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart<=0x10000) { 133585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2NullOffset=0xffff; 133685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 133785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset; 133885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 133985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); 134085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; 134185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 134285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* set the header fields */ 134385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header=(UTrie2Header *)trie->memory; 134485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 134585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->signature=UTRIE2_SIG; /* "Tri2" */ 134685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->options=(uint16_t)valueBits; 134785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 134885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->indexLength=(uint16_t)trie->indexLength; 134985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); 135085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->index2NullOffset=trie->index2NullOffset; 135185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->dataNullOffset=trie->dataNullOffset; 135285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); 135385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 135485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* fill the index and data arrays */ 135585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho dest16=(uint16_t *)(header+1); 135685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->index=dest16; 135785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 135885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ 135985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho p=(uint32_t *)newTrie->index2; 136085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { 136185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); 136285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 136385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 136485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write UTF-8 2-byte index-2 values, not right-shifted */ 136585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ 136685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); 136785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 136885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ 136985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); 137085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 137185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 137285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(highStart>0x10000) { 137385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; 137485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; 137585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 137685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write 16-bit index-1 values for supplementary code points */ 137785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; 137885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=index1Length; i>0; --i) { 137985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); 138085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 138185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 138285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* 138385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * write the index-2 array values for supplementary code points, 138485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove 138585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho */ 138685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho p=(uint32_t *)newTrie->index2+index2Offset; 138785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=newTrie->index2Length-index2Offset; i>0; --i) { 138885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); 138985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 139085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 139185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 139285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write the 16/32-bit data array */ 139385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho switch(valueBits) { 139485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho case UTRIE2_16_VALUE_BITS: 139585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write 16-bit data values */ 139685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data16=dest16; 139785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data32=NULL; 139885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho p=newTrie->data; 139985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(i=newTrie->dataLength; i>0; --i) { 140085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *dest16++=(uint16_t)*p++; 140185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 140285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho break; 140385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho case UTRIE2_32_VALUE_BITS: 140485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* write 32-bit data values */ 140585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data16=NULL; 140685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->data32=(uint32_t *)dest16; 140785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(dest16, newTrie->data, newTrie->dataLength*4); 140885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho break; 140985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho default: 141085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 141185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 141285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 141385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 141485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* Delete the UNewTrie2. */ 141585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(newTrie->data); 141685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(newTrie); 141785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie->newTrie=NULL; 141885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 141985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 142085bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI UBool U_EXPORT2 142185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_isFrozen(const UTrie2 *trie) { 142285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return (UBool)(trie->newTrie==NULL); 142385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 142485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 142585bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoU_CAPI int32_t U_EXPORT2 142685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Houtrie2_serialize(UTrie2 *trie, 142785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho void *data, int32_t capacity, 142885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UErrorCode *pErrorCode) { 142985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* argument check */ 143085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*pErrorCode)) { 143185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return 0; 143285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 143385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 143485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || 143585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) 143685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ) { 143785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 143885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return 0; 143985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 144085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 144185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(capacity>=trie->length) { 144285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_memcpy(data, trie->memory, trie->length); 144385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 144485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 144585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 144685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return trie->length; 144785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho} 144827f654740f2a26ad62a5c155af9199af9e69b889claireho 144927f654740f2a26ad62a5c155af9199af9e69b889claireho/* 145027f654740f2a26ad62a5c155af9199af9e69b889claireho * This is here to avoid a dependency from utrie2.cpp on utrie.c. 145127f654740f2a26ad62a5c155af9199af9e69b889claireho * This file already depends on utrie.c. 145227f654740f2a26ad62a5c155af9199af9e69b889claireho * Otherwise, this should be in utrie2.cpp right after utrie2_swap(). 145327f654740f2a26ad62a5c155af9199af9e69b889claireho */ 145427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int32_t U_EXPORT2 145527f654740f2a26ad62a5c155af9199af9e69b889clairehoutrie2_swapAnyVersion(const UDataSwapper *ds, 145627f654740f2a26ad62a5c155af9199af9e69b889claireho const void *inData, int32_t length, void *outData, 145727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *pErrorCode) { 145827f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_SUCCESS(*pErrorCode)) { 145927f654740f2a26ad62a5c155af9199af9e69b889claireho switch(utrie2_getVersion(inData, length, TRUE)) { 146027f654740f2a26ad62a5c155af9199af9e69b889claireho case 1: 146127f654740f2a26ad62a5c155af9199af9e69b889claireho return utrie_swap(ds, inData, length, outData, pErrorCode); 146227f654740f2a26ad62a5c155af9199af9e69b889claireho case 2: 146327f654740f2a26ad62a5c155af9199af9e69b889claireho return utrie2_swap(ds, inData, length, outData, pErrorCode); 146427f654740f2a26ad62a5c155af9199af9e69b889claireho default: 146527f654740f2a26ad62a5c155af9199af9e69b889claireho *pErrorCode=U_INVALID_FORMAT_ERROR; 146627f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 146727f654740f2a26ad62a5c155af9199af9e69b889claireho } 146827f654740f2a26ad62a5c155af9199af9e69b889claireho } 146927f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 147027f654740f2a26ad62a5c155af9199af9e69b889claireho} 1471