1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2001-2010, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ucaelems.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created 02/22/2001 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Vladimir Weinstein 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This program reads the Franctional UCA table and generates 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* internal format for UCA table as well as inverse UCA table. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* It then writes binary files containing the data: ucadata.dat 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* & invuca.dat 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* date name comments 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/02/2001 synwee added setMaxExpansion 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 03/07/2001 synwee merged UCA's maxexpansion and tailoring's 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucoleitr.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h" 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_elm.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucol_tok.h" 37c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#include "ucol_cnt.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/caniter.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status); 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t U_CALLCONV 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprefixLookupHash(const UHashTok e) { 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element = (UCAElements *)e.pointer; 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buf[256]; 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UHashTok key; 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru key.pointer = buf; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar)); 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf[element->cSize] = 0; 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //key.pointer = element->cPoints; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //element->cPoints[element->cSize] = 0; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return uhash_hashUChars(key); 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int8_t U_CALLCONV 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprefixLookupComp(const UHashTok e1, const UHashTok e2) { 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element1 = (UCAElements *)e1.pointer; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element2 = (UCAElements *)e2.pointer; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buf1[256]; 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UHashTok key1; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru key1.pointer = buf1; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar)); 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf1[element1->cSize] = 0; 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buf2[256]; 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UHashTok key2; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru key2.pointer = buf2; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar)); 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf2[element2->cSize] = 0; 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return uhash_compareUChars(key1, key2); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) { 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expansions->CEs == NULL) { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (expansions->CEs == NULL) { 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->size = INIT_EXP_TABLE_SIZE; 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->position = 0; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(expansions->position == expansions->size) { 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t)); 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newData == NULL) { 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "out of memory for expansions\n"); 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->CEs = newData; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->size *= 2; 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansions->CEs[expansions->position] = value; 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return(expansions->position++); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI tempUCATable* U_EXPORT2 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) { 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxJamoExpansionTable *maxjet; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxExpansionTable *maxet; 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t == NULL) { 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(t, 0, sizeof(tempUCATable)); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxet = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxet == NULL) { 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(maxet, 0, sizeof(MaxExpansionTable)); 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->maxExpansions = maxet; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxjet == NULL) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(maxjet, 0, sizeof(MaxJamoExpansionTable)); 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->maxJamoExpansions = maxjet; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->image = image; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->options = opts; 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->UCA = UCA; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expansions == NULL) { 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(t->expansions, 0, sizeof(ExpansionTable)); 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->mapping = utrie_open(NULL, NULL, UCOL_ELM_TRIE_CAPACITY, 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_SPECIAL_FLAG | (initTag<<24), 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24), 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TRUE); // Do your own mallocs for the structure, array and have linear Latin 1 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, NULL, status); 157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto allocation_failure; 159c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock); 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->contractions = uprv_cnttab_open(t->mapping, status); 163c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 164c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy UCA's maxexpansion and merge as we go along */ 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (UCA != NULL) { 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* adding an extra initial value for easier manipulation */ 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho maxet->size = (int32_t)(UCA->lastEndExpansionCE - UCA->endExpansionCE) + 2; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxet->position = maxet->size - 1; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxet->endExpansionCE = 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size); 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxet->endExpansionCE == NULL) { 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxet->expansionCESize = 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size); 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxet->expansionCESize == NULL) { 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* initialized value */ 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxet->endExpansionCE) = 0; 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxet->expansionCESize) = 0; 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE, 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(uint32_t) * (maxet->size - 1)); 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize, 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(uint8_t) * (maxet->size - 1)); 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxet->size = 0; 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->endExpansionCE = NULL; 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->isV = NULL; 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->size = 0; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->position = 0; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->maxLSize = 1; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->maxVSize = 1; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjet->maxTSize = 1; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->unsafeCP == NULL) { 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->contrEndCP == NULL) { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto allocation_failure; 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE); 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup = NULL; 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruallocation_failure: 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 220c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup: 221c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_closeTempTable(t); 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic tempUCATable* U_EXPORT2 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r == NULL) { 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(r, 0, sizeof(tempUCATable)); 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mapping */ 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->mapping != NULL) { 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*r->mapping = ucmpe32_clone(t->mapping, status);*/ 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->mapping = utrie_clone(NULL, t->mapping, NULL, 0); 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a hashing clone function would be very nice. We have none currently... 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // However, we should be good, as closing should not produce any prefixed elements. 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->prefixLookup = NULL; // prefixes are not used in closing 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* expansions */ 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->expansions != NULL) { 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->expansions == NULL) { 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 255c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->expansions->position = t->expansions->position; 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->expansions->size = t->expansions->size; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->expansions->CEs != NULL) { 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size); 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->expansions->CEs == NULL) { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 264c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->expansions->CEs = NULL; 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->contractions != NULL) { 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->contractions = uprv_cnttab_clone(t->contractions, status); 274c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for cloning failure. 275c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (r->contractions == NULL) { 276c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 277c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 278c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->contractions->mapping = r->mapping; 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->maxExpansions != NULL) { 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxExpansions == NULL) { 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 287c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->size = t->maxExpansions->size; 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->position = t->maxExpansions->position; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->maxExpansions->endExpansionCE != NULL) { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size); 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxExpansions->endExpansionCE == NULL) { 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 298c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size); 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t)); 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->endExpansionCE = NULL; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->maxExpansions->expansionCESize != NULL) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxExpansions->expansionCESize == NULL) { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size); 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t)); 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxExpansions->expansionCESize = NULL; 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->maxJamoExpansions != NULL) { 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxJamoExpansions == NULL) { 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->size = t->maxJamoExpansions->size; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->position = t->maxJamoExpansions->position; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize; 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->maxJamoExpansions->size != 0) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size); 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxJamoExpansions->endExpansionCE == NULL) { 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 334c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t)); 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size); 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->maxJamoExpansions->isV == NULL) { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 341c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool)); 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->endExpansionCE = NULL; 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->maxJamoExpansions->isV = NULL; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->unsafeCP != NULL) { 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->unsafeCP == NULL) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 355c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->contrEndCP != NULL) { 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (r->contrEndCP == NULL) { 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto cleanup; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->UCA = t->UCA; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->image = t->image; 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r->options = t->options; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return r; 375c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querucleanup: 376c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_closeTempTable(t); 377c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_closeTempTable(tempUCATable *t) { 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t != NULL) { 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->expansions != NULL) { 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->expansions->CEs); 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->expansions); 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->contractions != NULL) { 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_close(t->contractions); 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->mapping != NULL) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_close(t->mapping); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->prefixLookup != NULL) { 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uhash_close(t->prefixLookup); 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->maxExpansions != NULL) { 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxExpansions->endExpansionCE); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxExpansions->expansionCESize); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxExpansions); 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->maxJamoExpansions->size > 0) { 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxJamoExpansions->endExpansionCE); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxJamoExpansions->isV); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->maxJamoExpansions); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->unsafeCP); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->contrEndCP); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->cmLookup != NULL) { 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->cmLookup->cPoints); 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->cmLookup); 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t); 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Looks for the maximum length of all expansion sequences ending with the same 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation element. The size required for maxexpansion and maxsize is 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* returned if the arrays are too small. 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param endexpansion the last expansion collation element to be added 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param expansionsize size of the expansion 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param maxexpansion data structure to store the maximum expansion data. 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @returns size of the maxexpansion and maxsize used. 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int uprv_uca_setMaxExpansion(uint32_t endexpansion, 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t expansionsize, 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxExpansionTable *maxexpansion, 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->size == 0) { 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we'll always make the first element 0, for easier manipulation */ 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->endExpansionCE = 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t)); 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->endExpansionCE == NULL) { 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxexpansion->endExpansionCE) = 0; 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->expansionCESize = 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t)); 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */; 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->expansionCESize == NULL) { 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxexpansion->expansionCESize) = 0; 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size = INIT_EXP_TABLE_SIZE; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->position = 0; 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->position + 1 == maxexpansion->size) { 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2 * maxexpansion->size * sizeof(uint32_t)); 463c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (neweece == NULL) { 464c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 465c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 466c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 467c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxexpansion->endExpansionCE = neweece; 468c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize, 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2 * maxexpansion->size * sizeof(uint8_t)); 471c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (neweces == NULL) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 473c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->expansionCESize = neweces; 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size *= 2; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *pendexpansionce = maxexpansion->endExpansionCE; 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *pexpansionsize = maxexpansion->expansionCESize; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int pos = maxexpansion->position; 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *start = pendexpansionce; 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *limit = pendexpansionce + pos; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* using binary search to determine if last expansion element is 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru already in the array */ 488c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *mid; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int result = -1; 490c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while (start < limit - 1) { 491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mid = start + ((limit - start) >> 1); 492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (endexpansion <= *mid) { 493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru limit = mid; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru start = mid; 497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*start == endexpansion) { 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = (int)(start - pendexpansionce); 502c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 503c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else if (*limit == endexpansion) { 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho result = (int)(limit - pendexpansionce); 505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 507c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (result > -1) { 508c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* found the ce in expansion, we'll just modify the size if it is 509c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru smaller */ 510c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *currentsize = pexpansionsize + result; 511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (*currentsize < expansionsize) { 512c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *currentsize = expansionsize; 513c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 514c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 515c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 516c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* we'll need to squeeze the value into the array. 517c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru initial implementation. */ 518c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* shifting the subarray down by 1 */ 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int shiftsize = (int)((pendexpansionce + pos) - start); 520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t *shiftpos = start + 1; 521c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint8_t *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce); 522c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* okay need to rearrange the array into sorted order */ 524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */ 525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(pendexpansionce + pos + 1) = endexpansion; 526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *(pexpansionsize + pos + 1) = expansionsize; 527c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 528c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 529c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t)); 530c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memmove(sizeshiftpos + 1, sizeshiftpos, 531c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru shiftsize * sizeof(uint8_t)); 532c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *shiftpos = endexpansion; 533c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *sizeshiftpos = expansionsize; 534c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 535c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru maxexpansion->position ++; 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 538c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int temp; 539c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool found = FALSE; 540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for (temp = 0; temp < maxexpansion->position; temp ++) { 541c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) { 542c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "expansions %d\n", temp); 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pendexpansionce[temp] == endexpansion) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru found =TRUE; 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pexpansionsize[temp] < expansionsize) { 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "expansions size %d\n", temp); 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pendexpansionce[temp] == endexpansion) { 552c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru found =TRUE; 553c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (pexpansionsize[temp] < expansionsize) { 554c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "expansions size %d\n", temp); 555c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 556c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 557c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (!found) 558c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, "expansion not found %d\n", temp); 559c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 560c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 562c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return maxexpansion->position; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Sets the maximum length of all jamo expansion sequences ending with the same 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* collation element. The size required for maxexpansion and maxsize is 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* returned if the arrays are too small. 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param ch the jamo codepoint 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param endexpansion the last expansion collation element to be added 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param expansionsize size of the expansion 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param maxexpansion data structure to store the maximum expansion data. 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status error status 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @returns size of the maxexpansion and maxsize used. 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int uprv_uca_setMaxJamoExpansion(UChar ch, 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t endexpansion, 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t expansionsize, 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxJamoExpansionTable *maxexpansion, 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isV = TRUE; 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) { 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* determines L for Jamo, doesn't need to store this since it is never 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru at the end of a expansion */ 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->maxLSize < expansionsize) { 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->maxLSize = expansionsize; 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxexpansion->position; 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* determines V for Jamo */ 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->maxVSize < expansionsize) { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->maxVSize = expansionsize; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) { 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isV = FALSE; 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* determines T for Jamo */ 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->maxTSize < expansionsize) { 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->maxTSize = expansionsize; 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->size == 0) { 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we'll always make the first element 0, for easier manipulation */ 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->endExpansionCE = 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t)); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->endExpansionCE == NULL) { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxexpansion->endExpansionCE) = 0; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->isV = 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool)); 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */; 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->isV == NULL) { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(maxexpansion->endExpansionCE); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->endExpansionCE = NULL; 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxexpansion->isV) = 0; 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size = INIT_EXP_TABLE_SIZE; 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->position = 0; 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->position + 1 == maxexpansion->size) { 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size *= 2; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->endExpansionCE = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size * sizeof(uint32_t)); 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->endExpansionCE == NULL) { 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "out of memory for maxExpansions\n"); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->isV = (UBool *)uprv_realloc(maxexpansion->isV, 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->size * sizeof(UBool)); 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (maxexpansion->isV == NULL) { 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "out of memory for maxExpansions\n"); 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(maxexpansion->endExpansionCE); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->endExpansionCE = NULL; 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *pendexpansionce = maxexpansion->endExpansionCE; 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int pos = maxexpansion->position; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (pos > 0) { 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos --; 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(pendexpansionce + pos) == endexpansion) { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxexpansion->position; 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pendexpansionce + maxexpansion->position) = endexpansion; 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxexpansion->isV + maxexpansion->position) = isV; 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxexpansion->position ++; 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return maxexpansion->position; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void ContrEndCPSet(uint8_t *table, UChar c) { 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t hash; 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *htByte; 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = c; 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru htByte = &table[hash>>3]; 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *htByte |= (1 << (hash & 7)); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void unsafeCPSet(uint8_t *table, UChar c) { 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t hash; 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *htByte; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = c; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (hash >= 0xd800 && hash <= 0xf8ff) { 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Part of a surrogate, or in private use area. */ 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* These don't go in the table */ 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru htByte = &table[hash>>3]; 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *htByte |= (1 << (hash & 7)); 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_createCMTable(tempUCATable *t, int32_t noOfCM, UErrorCode *status) { 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup = (CombinClassTable *)uprv_malloc(sizeof(CombinClassTable)); 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->cmLookup==NULL) { 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup->cPoints=(UChar *)uprv_malloc(noOfCM*sizeof(UChar)); 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->cmLookup->cPoints ==NULL) { 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(t->cmLookup); 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup = NULL; 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup->size=noOfCM; 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(t->cmLookup->index, 0, sizeof(t->cmLookup->index)); 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_copyCMTable(tempUCATable *t, UChar *cm, uint16_t *index) { 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count=0; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<256; ++i) { 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index[i]>0) { 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // cPoints is ordered by combining class value. 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(t->cmLookup->cPoints+count, cm+(i<<8), index[i]*sizeof(UChar)); 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count += index[i]; 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup->index[i]=count; 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 1. to the UnsafeCP hash table, add all chars with combining class != 0 */ 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2. build combining marks table for all chars with combining class != 0 */ 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t fcd; // Hi byte is lead combining class. 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // lo byte is trailing combing class. 746b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint16_t *fcdTrieIndex; 747b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 fcdHighStart; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *cm=NULL; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t index[256]; 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count=0; 752b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(*status)) { 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buildCMTable) { 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cm==NULL) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cm = (UChar *)uprv_malloc(sizeof(UChar)*UCOL_MAX_CM_TAB); 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cm==NULL) { 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(index, 0, sizeof(index)); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (c=0; c<0xffff; c++) { 768b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru fcd = unorm_getFCD16(fcdTrieIndex, c); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (fcd >= 0x100 || // if the leading combining class(c) > 0 || 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UTF_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buildCMTable) { 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t cClass = fcd & 0xff; 773c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru //uint32_t temp=(cClass<<8)+index[cClass]; 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cm[(cClass<<8)+index[cClass]] = c; // 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index[cClass]++; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count++; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, c); 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // copy to cm table 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buildCMTable) { 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_createCMTable(t, count, status); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cm!=NULL) { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(cm); 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_copyCMTable(t, cm, index); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->prefixLookup != NULL) { 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = -1; 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UHashElement *e = NULL; 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element = NULL; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar NFCbuf[256]; 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t NFCbufLen = 0; 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) { 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element = (UCAElements *)e->value.pointer; 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // codepoints here are in the NFD form. We need to add the 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first code point of the NFC form to unsafe, because 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // strcoll needs to backup over them. 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0, 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NFCbuf, 256, status); 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, NFCbuf[0]); 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cm!=NULL) { 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(cm); 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE, 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element, UErrorCode *status) 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // currently the longest prefix we're supporting in Japanese is two characters 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // long. Although this table could quite easily mimic complete contraction stuff 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // there is no good reason to make a general solution, as it would require some 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // error prone messing. 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CntTable *contractions = t->contractions; 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t cpsize = 0; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *oldCP = element->cPoints; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t oldCPSize = element->cSize; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contractions->currentTag = SPEC_PROC_TAG; 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // here, we will normalize & add prefix to the table. 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t j = 0; 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<element->cSize; j++) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "CP: %04X ", element->cPoints[j]); 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "El: %08X Pref: ", CE); 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<element->prefixSize; j++) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%04X ", element->prefix[j]); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%08X ", element->mapCE); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j = 1; j<element->prefixSize; j++) { /* First add NFD prefix chars to unsafe CP hash table */ 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unless it is a trail surrogate, which is handled algoritmically and 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // shouldn't take up space in the table. 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!(UTF_IS_TRAIL(element->prefix[j]))) { 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, element->prefix[j]); 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar tempPrefix = 0; 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // therefore, we will promptly reverse the prefix buffer... 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j]; 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->prefix[j] = tempPrefix; 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "Reversed: "); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0; j<element->prefixSize; j++) { 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%04X ", element->prefix[j]); 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%08X\n", element->mapCE); 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!(UTF_IS_TRAIL(element->cPoints[0]))) { 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, element->cPoints[0]); 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Maybe we need this... To handle prefixes completely in the forward direction... 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if(element->cSize == 1) { 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if(!(UTF_IS_TRAIL(element->cPoints[0]))) { 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ContrEndCPSet(t->contrEndCP, element->cPoints[0]); 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // } 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //} 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints = element->prefix; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize = element->prefixSize; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add the last char of the contraction to the contraction-end hash table. 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unless it is a trail surrogate, which is handled algorithmically and 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // shouldn't be in the table 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) { 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First we need to check if contractions starts with a surrogate 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp); 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there are any Jamos in the contraction, we should turn on special 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processing for Jamos 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCOL_ISJAMO(element->prefix[0])) { 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->image->jamoSpecial = TRUE; 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* then we need to deal with it */ 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we could aready have something in table - or we might not */ 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isPrefix(CE)) { 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if it wasn't contraction, we wouldn't end up here*/ 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t firstContractionOffset = 0; 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status); 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = constructContractCE(SPEC_PROC_TAG, firstContractionOffset); 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* we are adding to existing contraction */ 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there were already some elements in the table, so we need to add a new contraction */ 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Two things can happen here: either the codepoint is already in the table, or it is not */ 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status); 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(position > 0) { /* if it is we just continue down the chain */ 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status); 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* if it isn't, we will have to create a new sequence */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status); 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints = oldCP; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize = oldCPSize; 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Note regarding surrogate handling: We are interested only in the single 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// or leading surrogates in a contraction. If a surrogate is somewhere else 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// in the contraction, it is going to be handled as a pair of code units, 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// as it doesn't affect the performance AND handling surrogates specially 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// would complicate code way too much. 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE, 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *element, UErrorCode *status) 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CntTable *contractions = t->contractions; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t cpsize = 0; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contractions->currentTag = CONTRACTION_TAG; 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First we need to check if contractions starts with a surrogate 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t j = 0; 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (j=1; j<element->cSize; j++) { /* First add contraction chars to unsafe CP hash table */ 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unless it is a trail surrogate, which is handled algoritmically and 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // shouldn't take up space in the table. 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!(UTF_IS_TRAIL(element->cPoints[j]))) { 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, element->cPoints[j]); 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Add the last char of the contraction to the contraction-end hash table. 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unless it is a trail surrogate, which is handled algorithmically and 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // shouldn't be in the table 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) { 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there are any Jamos in the contraction, we should turn on special 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processing for Jamos 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCOL_ISJAMO(element->cPoints[0])) { 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->image->jamoSpecial = TRUE; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* then we need to deal with it */ 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we could aready have something in table - or we might not */ 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints+=cpsize; 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize-=cpsize; 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isContraction(CE)) { 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if it wasn't contraction, we wouldn't end up here*/ 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t firstContractionOffset = 0; 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = constructContractCE(CONTRACTION_TAG, firstContractionOffset); 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* we are adding to existing contraction */ 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there were already some elements in the table, so we need to add a new contraction */ 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Two things can happen here: either the codepoint is already in the table, or it is not */ 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status); 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(position > 0) { /* if it is we just continue down the chain */ 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status); 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* if it isn't, we will have to create a new sequence */ 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status); 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints-=cpsize; 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize+=cpsize; 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ucmpe32_set(t->mapping, cp, CE);*/ 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_set32(t->mapping, cp, CE); 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */ 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ucmpe32_set(t->mapping, cp, element->mapCE);*/ 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_set32(t->mapping, cp, element->mapCE); 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* fill out the first stage of the contraction with the surrogate CE */ 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status); 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status); 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) { 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t firstContractionOffset = 0; 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // uint32_t contractionElement = UCOL_NOT_FOUND; 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UCOL_NOT_FOUND; 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* end of recursion */ 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->cSize == 1) { 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) { 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status); 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return existingCE; 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */ 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */ 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* for both backward and forward cycles */ 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we encountered either an empty space or a non-contraction element */ 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this means we are constructing a new contraction sequence */ 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints++; 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize--; 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isCntTableElement(existingCE)) { 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if it wasn't contraction, we wouldn't end up here*/ 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status); 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status); 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru existingCE = constructContractCE(contractions->currentTag, firstContractionOffset); 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* we are adding to existing contraction */ 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there were already some elements in the table, so we need to add a new contraction */ 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Two things can happen here: either the codepoint is already in the table, or it is not */ 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status); 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(position > 0) { /* if it is we just continue down the chain */ 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status); 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status); 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* if it isn't, we will have to create a new sequence */ 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status); 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cPoints--; 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize++; 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return existingCE; 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE = UCOL_NOT_FOUND; 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This should add a completely ignorable element to the 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unsafe table, so that backward iteration will skip 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // over it when treating contractions. 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 0; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->mapCE == 0) { 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i = 0; i < element->cSize; i++) { 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!UTF_IS_TRAIL(element->cPoints[i])) { 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, element->cPoints[i]); 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->cSize > 1) { /* we're adding a contraction */ 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 0; 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 cp; 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp); 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*CE = ucmpe32_get(t->mapping, cp);*/ 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = utrie_get32(t->mapping, cp, NULL); 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = uprv_uca_addContraction(t, CE, element, status); 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { /* easy case, */ 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/ 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = utrie_get32(t->mapping, element->cPoints[0], NULL); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( CE != UCOL_NOT_FOUND) { 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */ 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Only expansions and regular CEs can go here... Contractions will never happen in this place 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status); 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* This loop has to change the CE at the end of contraction REDO!*/ 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_set32(t->mapping, element->cPoints[0], element->mapCE); 109727f654740f2a26ad62a5c155af9199af9e69b889claireho if ((element->prefixSize!=0) && (!isSpecial(CE) || (getCETag(CE)!=IMPLICIT_TAG))) { 1098c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UCAElements *origElem = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1099c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* test for NULL */ 1100c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (origElem== NULL) { 1101c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 1102c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1103c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1104c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* copy the original UCA value */ 1105c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->prefixSize = 0; 1106c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->prefix = NULL; 1107c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->cPoints = origElem->uchars; 1108c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->cPoints[0] = element->cPoints[0]; 1109c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->cSize = 1; 1110c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->CEs[0]=CE; 1111c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->mapCE=CE; 1112c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru origElem->noOfCEs=1; 1113c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_finalizeAddition(t, origElem, status); 1114c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(origElem); 1115c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]); 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //*status = U_ILLEGAL_ARGUMENT_ERROR; 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_set32(t->mapping, element->cPoints[0], element->mapCE); 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* This adds a read element, while testing for existence */ 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ExpansionTable *expansions = t->expansions; 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 1; 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t expansion = 0; 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t CE; 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xFFFF; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = 0; // clear mapCE so that we can catch expansions 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->noOfCEs == 1) { 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = element->CEs[0]; 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ICU 2.1 long primaries */ 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* unfortunately, it looks like we have to look for a long primary here */ 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* since in canonical closure we are going to hit some long primaries from */ 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the first phase, and they will come back as continuations/expansions */ 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* destroying the effect of the previous opitimization */ 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* A long primary is a three byte primary with starting secondaries and tertiaries */ 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* It can appear in long runs of only primary differences (like east Asian tailorings) */ 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* also, it should not be an expansion, as expansions would break with this */ 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This part came in from ucol_bld.cpp 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if(tok->expansion == 0 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) { 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we will construct a special CE that will go unchanged to the table */ 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->noOfCEs == 2 // a two CE expansion 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && isContinuation(element->CEs[1]) // which is a continuation 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary 1167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ) 1168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru { 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stdout, "Long primary %04X\n", element->cPoints[0]); 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special 1173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary 1174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru | ((element->CEs[1]>>24) & 0xFF); // third byte of primary 1175c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1176c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru else { 1177c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) 117827f654740f2a26ad62a5c155af9199af9e69b889claireho | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) 117927f654740f2a26ad62a5c155af9199af9e69b889claireho & 0xFFFFF0)); 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1181c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i = 1; i<element->noOfCEs; i++) { 1182c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_addExpansion(expansions, element->CEs[i], status); 1183c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1184c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(element->noOfCEs <= 0xF) { 1185c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru expansion |= element->noOfCEs; 1186c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1187c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_addExpansion(expansions, 0, status); 1188c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru element->mapCE = expansion; 1190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], 1191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (uint8_t)element->noOfCEs, 1192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->maxExpansions, 1193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status); 1194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(UCOL_ISJAMO(element->cPoints[0])) { 1195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->image->jamoSpecial = TRUE; 1196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_uca_setMaxJamoExpansion(element->cPoints[0], 1197c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru element->CEs[element->noOfCEs - 1], 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)element->noOfCEs, 1199c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru t->maxJamoExpansions, 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status); 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1202c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 1203c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1204c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1205c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We treat digits differently - they are "uber special" and should be 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processed differently if numeric collation is on. 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 uniChar = 0; 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //printElement(element); 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((element->cSize == 2) && U16_IS_LEAD(element->cPoints[0])){ 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uniChar = U16_GET_SUPPLEMENTARY(element->cPoints[0], element->cPoints[1]); 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (element->cSize == 1){ 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uniChar = element->cPoints[0]; 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // one element to the expansion buffer. When we encounter a digit and we don't 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do numeric collation, we will just pick the CE we have and break out of case 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a special, further processing will occur. If it's a simple CE, we'll return due 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to how the loop is constructed. 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uniChar != 0 && u_isdigit(uniChar)){ 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->mapCE) { // if there is an expansion, we'll pick it here 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4); 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4); 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = expansion; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Need to go back to the beginning of the digit string if in the middle! 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unsafeCPSet(t->unsafeCP, (UChar)uniChar); 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // here we want to add the prefix structure. 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // I will try to process it as a reverse contraction, if possible. 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // prefix buffer is already reversed. 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->prefixSize!=0) { 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We keep the seen prefix starter elements in a hashtable 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we need it to be able to distinguish between the simple 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // codepoints and prefix starters. Also, we need to use it 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for canonical closure. 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (composed == NULL) { 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(composed, element, sizeof(UCAElements)); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru composed->cPoints = composed->uchars; 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru composed->prefix = composed->prefixChars; 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status); 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t->prefixLookup != NULL) { 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element); 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(uCE != NULL) { // there is already a set of code points here 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status); 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { // no code points, so this spot is clean 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status); 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uCE == NULL) { 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(uCE, element, sizeof(UCAElements)); 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uCE->cPoints = uCE->uchars; 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uhash_put(t->prefixLookup, uCE, uCE, status); 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) { 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do it! 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status); 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(composed); 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We need to use the canonical iterator here 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the way we do it is to generate the canonically equivalent strings 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for the contraction and then add the sequences that pass FCD check 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source(element->cPoints, element->cSize); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CanonicalIterator it(source, *status); 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = it.next(); 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(!source.isBogus()) { 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) { 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->cSize = source.extract(element->cPoints, 128, *status); 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_finalizeAddition(t, element, status); 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = it.next(); 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = element->mapCE; 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CE = uprv_uca_finalizeAddition(t, element, status); 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return CE; 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */ 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping, 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxExpansionTable *maxexpansion, 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxJamoExpansionTable *maxjamoexpansion, 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool jamospecial, 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t VBASE = 0x1161; 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t TBASE = 0x11A8; 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t VCOUNT = 21; 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint32_t TCOUNT = 28; 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t v = VBASE + VCOUNT - 1; 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t t = TBASE + TCOUNT - 1; 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t ce; 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (v >= VBASE) { 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ce = ucmpe32_get(mapping, v);*/ 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ce = utrie_get32(mapping, v, NULL); 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ce < UCOL_SPECIAL_FLAG) { 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status); 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru v --; 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (t >= TBASE) 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ce = ucmpe32_get(mapping, t);*/ 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ce = utrie_get32(mapping, t, NULL); 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ce < UCOL_SPECIAL_FLAG) { 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status); 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t --; 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* According to the docs, 99% of the time, the Jamo will not be special */ 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (jamospecial) { 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* gets the max expansion in all unicode characters */ 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int count = maxjamoexpansion->position; 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize + 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjamoexpansion->maxVSize + 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjamoexpansion->maxTSize); 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize + 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxjamoexpansion->maxVSize); 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (count > 0) { 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count --; 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (*(maxjamoexpansion->isV + count) == TRUE) { 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMaxExpansion( 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxjamoexpansion->endExpansionCE + count), 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxVSize, maxexpansion, status); 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMaxExpansion( 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(maxjamoexpansion->endExpansionCE + count), 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxTSize, maxexpansion, status); 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline uint32_t U_CALLCONV 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t value; 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t tag; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 limit; 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool inBlockZero; 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit=start+0x400; 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<limit) { 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru value=utrie_get32(trie, start, &inBlockZero); 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tag = getCETag(value); 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(inBlockZero == TRUE) { 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+=UTRIE_DATA_BLOCK_LENGTH; 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) { 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * nothing in this position and that it should be skipped. 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static int32_t count = 1; 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value); 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset); 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++start; 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// This is a debug function to print the contents of a trie. 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// It is used in conjuction with the code around utrie_unserialize call 140527f654740f2a26ad62a5c155af9199af9e69b889clairehoUBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) { 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start<0x10000) { 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value); 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value); 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 141127f654740f2a26ad62a5c155af9199af9e69b889claireho return TRUE; 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerumyGetFoldingOffset(uint32_t data) { 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) { 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (data&0xFFFFFF); 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCATableHeader* U_EXPORT2 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) { 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*CompactEIntArray *mapping = t->mapping;*/ 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNewTrie *mapping = t->mapping; 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ExpansionTable *expansions = t->expansions; 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CntTable *contractions = t->contractions; 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MaxExpansionTable *maxexpansion = t->maxExpansions; 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*status)) { 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar)); 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t contractionsSize = 0; 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status); 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the following operation depends on the trie data. Therefore, we have to do it before */ 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the trie is compacted */ 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* sets jamo expansions */ 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions, 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->image->jamoSpecial, status); 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*ucmpe32_compact(mapping);*/ 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/ 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/ 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/ 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // After setting the jamo expansions, compact the trie and get the needed size 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status); 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t tableOffset = 0; 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dataStart; 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */ 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t toAllocate =(uint32_t)(headersize+ 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(expansions->position*sizeof(uint32_t))+ 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(mappingSize)+ 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+ 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //paddedsize(0x100*sizeof(uint32_t)) /* Latin1 is now included in the trie */ 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* maxexpansion array */ 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + paddedsize(maxexpansion->position * sizeof(uint32_t)) + 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* maxexpansion size array */ 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(maxexpansion->position * sizeof(uint8_t)) + 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(UCOL_UNSAFECP_TABLE_SIZE) + /* Unsafe chars */ 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru paddedsize(UCOL_UNSAFECP_TABLE_SIZE)); /* Contraction Ending chars */ 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dataStart = (uint8_t *)uprv_malloc(toAllocate); 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dataStart == NULL) { 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_MEMORY_ALLOCATION_ERROR; 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCATableHeader *myData = (UCATableHeader *)dataStart; 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Please, do reset all the fields! 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(dataStart, 0, toAllocate); 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure we know this is reset 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->magic = UCOL_HEADER_MAGIC; 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->isBigEndian = U_IS_BIG_ENDIAN; 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->charSetFamily = U_CHARSET_FAMILY; 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->formatVersion[0] = UCA_FORMAT_VERSION_0; 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->formatVersion[1] = UCA_FORMAT_VERSION_1; 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->formatVersion[2] = UCA_FORMAT_VERSION_2; 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->formatVersion[3] = UCA_FORMAT_VERSION_3; 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->jamoSpecial = t->image->jamoSpecial; 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Don't copy stuff from UCA header! 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //uprv_memcpy(myData, t->image, sizeof(UCATableHeader)); 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contractionSize = contractionsSize; 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader))); 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->options = tableOffset; 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet)); 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet))); 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy expansions */ 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/ 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->expansion = tableOffset; 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t)); 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t))); 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* contractions block */ 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(contractionsSize != 0) { 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy contraction index */ 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/ 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contractionIndex = tableOffset; 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar)); 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar))); 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy contraction collation elements */ 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/ 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contractionCEs = tableOffset; 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t)); 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t))); 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contractionIndex = 0; 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contractionCEs = 0; 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy mapping table */ 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*myData->mappingPosition = dataStart+tableOffset;*/ 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*myData->mappingPosition = tableOffset;*/ 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/ 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->mappingPosition = tableOffset; 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status); 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is debug code to dump the contents of the trie. It needs two functions defined above 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTrie UCAt = { 0 }; 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t trieWord; 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status); 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAt.getFoldingOffset = myGetFoldingOffset; 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrie_enum(&UCAt, NULL, enumRange, NULL); 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 154527f654740f2a26ad62a5c155af9199af9e69b889claireho trieWord = UTRIE_GET32_FROM_LEAD(&UCAt, 0xDC01); 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += paddedsize(mappingSize); 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i = 0; 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy max expansion table */ 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->endExpansionCE = tableOffset; 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->endExpansionCECount = maxexpansion->position - 1; 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not copying the first element which is a dummy */ 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1, 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (maxexpansion->position - 1) * sizeof(uint32_t)); 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t))); 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->expansionCESize = tableOffset; 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1, 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (maxexpansion->position - 1) * sizeof(uint8_t)); 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t))); 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unsafe chars table. Finish it off, then copy it. */ 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_unsafeCPAddCCNZ(t, status); 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */ 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->unsafeCP[i] |= t->UCA->unsafeCP[i]; 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->unsafeCP = tableOffset; 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Finish building Contraction Ending chars hash table and then copy it out. */ 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */ 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->contrEndCP[i] |= t->UCA->contrEndCP[i]; 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->contrEndCP = tableOffset; 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); 1585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); 1586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(tableOffset != toAllocate) { 1588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 1589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset); 1590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *status = U_INTERNAL_PROGRAM_ERROR; 1592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(dataStart); 1593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru myData->size = tableOffset; 1597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* This should happen upon ressurection */ 1598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/ 1599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*uprv_mstrm_close(ms);*/ 1600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return myData; 1601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct enumStruct { 1605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempUCATable *t; 1606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *tempColl; 1607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl; 160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2Impl *nfcImpl; 160927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet *closed; 1610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t noOfClosures; 1611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status; 1612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 1613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 1614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool U_CALLCONV 1615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later 1618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status = ((enumStruct *)context)->status; 1619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempUCATable *t = ((enumStruct *)context)->t; 1620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *tempColl = ((enumStruct *)context)->tempColl; 1621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl = ((enumStruct *)context)->colEl; 1622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements el; 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar decompBuffer[4]; 162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *decomp; 1625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t noOfDec = 0; 1626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 u32 = 0; 1628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar comp[2]; 1629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t len = 0; 1630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(u32 = start; u32 < limit; u32++) { 163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho decomp = ((enumStruct *)context)->nfcImpl-> 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getDecomposition(u32, decompBuffer, noOfDec); 1634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1 1635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //|| (noOfDec == 1 && *decomp != (UChar)u32)) 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(decomp != NULL) 1637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = 0; 163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U16_APPEND_UNSAFE(comp, len, u32); 1640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) { 1641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCOL_DEBUG 164227f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "Closure: U+%04X -> ", u32); 164327f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c; 164427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i = 0; 164527f654740f2a26ad62a5c155af9199af9e69b889claireho while(i < noOfDec) { 164627f654740f2a26ad62a5c155af9199af9e69b889claireho U16_NEXT(decomp, i, noOfDec, c); 164727f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "%04X ", c); 164827f654740f2a26ad62a5c155af9199af9e69b889claireho } 164927f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "\n"); 165027f654740f2a26ad62a5c155af9199af9e69b889claireho // print CEs for code point vs. decomposition 165127f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "U+%04X CEs: ", u32); 165227f654740f2a26ad62a5c155af9199af9e69b889claireho UCollationElements *iter = ucol_openElements(tempColl, comp, len, status); 165327f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t ce; 165427f654740f2a26ad62a5c155af9199af9e69b889claireho while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { 165527f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "%08X ", ce); 165627f654740f2a26ad62a5c155af9199af9e69b889claireho } 165727f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "\nDecomp CEs: "); 165827f654740f2a26ad62a5c155af9199af9e69b889claireho ucol_setText(iter, decomp, noOfDec, status); 165927f654740f2a26ad62a5c155af9199af9e69b889claireho while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { 166027f654740f2a26ad62a5c155af9199af9e69b889claireho fprintf(stderr, "%08X ", ce); 1661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "\n"); 166327f654740f2a26ad62a5c155af9199af9e69b889claireho ucol_closeElements(iter); 1664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 166527f654740f2a26ad62a5c155af9199af9e69b889claireho if(((enumStruct *)context)->closed != NULL) { 166627f654740f2a26ad62a5c155af9199af9e69b889claireho ((enumStruct *)context)->closed->add(u32); 166727f654740f2a26ad62a5c155af9199af9e69b889claireho } 1668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((enumStruct *)context)->noOfClosures++; 166950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho el.cPoints = (UChar *)decomp; 1670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cSize = noOfDec; 1671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.noOfCEs = 0; 1672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefix = el.prefixChars; 1673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefixSize = 0; 1674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el); 1676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cPoints = comp; 1677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cSize = len; 1678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefix = el.prefixChars; 1679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefixSize = 0; 1680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prefix == NULL) { 1681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.noOfCEs = 0; 1682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setText(colEl, decomp, noOfDec, status); 1683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.noOfCEs++; 1685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.noOfCEs = 1; 1688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.CEs[0] = prefix->mapCE; 1689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This character uses a prefix. We have to add it 1690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to the unsafe table, as it decomposed form is already 1691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in. In Japanese, this happens for \u309e & \u30fe 1692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Since unsafeCPSet is static in ucol_elm, we are going 1693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function 1694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addAnElement(t, &el, status); 1696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 1703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_setMapCE(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t expansion = 0; 1707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t j; 1708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ExpansionTable *expansions = t->expansions; 1710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->noOfCEs == 2 // a two CE expansion 1711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && isContinuation(element->CEs[1]) // which is a continuation 1712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, 1713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary 1714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary 1715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special 1717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary 1718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru | ((element->CEs[1]>>24) & 0xFF); // third byte of primary 1719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) 172127f654740f2a26ad62a5c155af9199af9e69b889claireho | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) 172227f654740f2a26ad62a5c155af9199af9e69b889claireho & 0xFFFFF0)); 1723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j = 1; j<(int32_t)element->noOfCEs; j++) { 1725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addExpansion(expansions, element->CEs[j], status); 1726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(element->noOfCEs <= 0xF) { 1728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expansion |= element->noOfCEs; 1729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addExpansion(expansions, 0, status); 1731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element->mapCE = expansion; 1733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], 1734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)element->noOfCEs, 1735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->maxExpansions, 1736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status); 1737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_addFCD4AccentedContractions(tempUCATable *t, 1742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl, 1743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *data, 1744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len, 1745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *el, 1746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 1747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar decomp[256], comp[256]; 1748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t decLen, compLen; 1749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decLen = unorm_normalize(data, len, UNORM_NFD, 0, decomp, 256, status); 1751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compLen = unorm_normalize(data, len, UNORM_NFC, 0, comp, 256, status); 1752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[decLen] = comp[compLen] = 0; 1753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cPoints = decomp; 1755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cSize = decLen; 1756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->noOfCEs = 0; 1757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefixSize = 0; 1758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefix = el->prefixChars; 1759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); 1761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cPoints = comp; 1762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cSize = compLen; 1763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefix = el->prefixChars; 1764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefixSize = 0; 1765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prefix == NULL) { 1766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->noOfCEs = 0; 1767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setText(colEl, decomp, decLen, status); 1768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->noOfCEs++; 1770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMapCE(t, el, status); 1772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addAnElement(t, el, status); 1773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_addMultiCMContractions(tempUCATable *t, 1778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl, 1779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempTailorContext *c, 1780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *el, 1781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 1782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CombinClassTable *cmLookup = t->cmLookup; 1783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar newDecomp[256]; 1784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxComp, newDecLen; 1785b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 fcdHighStart; 1786b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 1787b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 1788b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 1789b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1790b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t curClass = (unorm_getFCD16(fcdTrieIndex, c->tailoringCM) & 0xff); 1791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CompData *precomp = c->precomp; 1792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t compLen = c->compLen; 1793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *comp = c->comp; 1794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxComp = c->precompLen; 1795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t j=0; j < maxComp; j++) { 1797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count=0; 1798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 1799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( count == 0 ) { // Decompose the saved precomposed char. 1800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar temp[2]; 1801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp[0]=precomp[j].cp; 1802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp[1]=0; 1803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecLen = unorm_normalize(temp, 1, UNORM_NFD, 0, 1804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecomp, sizeof(newDecomp)/sizeof(UChar), status); 1805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecomp[newDecLen++] = cmLookup->cPoints[c->cmPos]; 1806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { // swap 2 combining marks when they are equal. 1808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(newDecomp, c->decomp, sizeof(UChar)*(c->decompLen)); 1809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecLen = c->decompLen; 1810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecomp[newDecLen++] = precomp[j].cClass; 1811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newDecomp[newDecLen] = 0; 1813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compLen = unorm_normalize(newDecomp, newDecLen, UNORM_NFC, 0, 1814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru comp, 256, status); 1815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (compLen==1) { 1816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru comp[compLen++] = newDecomp[newDecLen++] = c->tailoringCM; 1817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru comp[compLen] = newDecomp[newDecLen] = 0; 1818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cPoints = newDecomp; 1819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cSize = newDecLen; 1820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); 1822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cPoints = c->comp; 1823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->cSize = compLen; 1824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefix = el->prefixChars; 1825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->prefixSize = 0; 1826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prefix == NULL) { 1827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->noOfCEs = 0; 1828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setText(colEl, newDecomp, newDecLen, status); 1829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el->noOfCEs++; 1831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMapCE(t, el, status); 1833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_finalizeAddition(t, el, status); 1834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the current precomposed char and its class to find any 1836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // other combining mark combinations. 1837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precomp[c->precompLen].cp=comp[0]; 1838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precomp[c->precompLen].cClass = curClass; 1839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c->precompLen++; 1840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (++count<2 && (precomp[j].cClass == curClass)); 1843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_addTailCanonicalClosures(tempUCATable *t, 1849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl, 1850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar baseCh, 1851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar cMark, 1852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *el, 1853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) { 1854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CombinClassTable *cmLookup = t->cmLookup; 1855b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 fcdHighStart; 1856b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 1857b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if (U_FAILURE(*status)) { 1858b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return; 1859b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1860b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t maxIndex = (unorm_getFCD16(fcdTrieIndex, cMark) & 0xff ); 1861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements element; 1862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t *index; 1863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar decomp[256]; 1864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar comp[256]; 1865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CompData precomp[256]; // precomposed array 1866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t precompLen = 0; // count for precomp 1867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, len, decompLen, curClass, replacedPos; 1868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempTailorContext c; 1869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( cmLookup == NULL ) { 1871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index = cmLookup->index; 1874b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t cClass=(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff); 1875b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru maxIndex = (int32_t)index[(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff)-1]; 1876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.comp = comp; 1877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.decomp = decomp; 1878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.precomp = precomp; 1879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.tailoringCM = cMark; 1880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cClass>0) { 1882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxIndex = (int32_t)index[cClass-1]; 1883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxIndex=0; 1886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[0]=baseCh; 1888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for ( i=0; i<maxIndex ; i++ ) { 1889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[1] = cmLookup->cPoints[i]; 1890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[2]=0; 1891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decompLen=2; 1892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); 1893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len==1) { 1894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Save the current precomposed char and its class to find any 1895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // other combining mark combinations. 1896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precomp[precompLen].cp=comp[0]; 1897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru curClass = precomp[precompLen].cClass = 1898b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru index[unorm_getFCD16(fcdTrieIndex, decomp[1]) & 0xff]; 1899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precompLen++; 1900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacedPos=0; 1901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) { 1902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[decompLen] = el->cPoints[decompLen]; 1903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (decomp[decompLen]==cMark) { 1904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru replacedPos = decompLen; // record the position for later use 1905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( replacedPos != 0 ) { 1908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[replacedPos]=cmLookup->cPoints[i]; 1909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[decompLen] = 0; 1911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); 1912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru comp[len++] = decomp[decompLen++] = cMark; 1913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru comp[len] = decomp[decompLen] = 0; 1914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.cPoints = decomp; 1915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.cSize = decompLen; 1916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.noOfCEs = 0; 1917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.prefix = el->prefixChars; 1918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.prefixSize = 0; 1919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &element); 1921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.cPoints = comp; 1922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.cSize = len; 1923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.prefix = el->prefixChars; 1924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.prefixSize = 0; 1925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(prefix == NULL) { 1926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.noOfCEs = 0; 1927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_setText(colEl, decomp, decompLen, status); 1928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((element.CEs[element.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru element.noOfCEs++; 1930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_setMapCE(t, &element, status); 1932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_finalizeAddition(t, &element, status); 1933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is a fix for tailoring contractions with accented 1936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // character at the end of contraction string. 1937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((len>2) && 1938b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru (unorm_getFCD16(fcdTrieIndex, comp[len-2]) & 0xff00)==0) { 1939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status); 1940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (precompLen >1) { 1943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.compLen = len; 1944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.decompLen = decompLen; 1945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.precompLen = precompLen; 1946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c.cmPos = i; 1947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addMultiCMContractions(t, colEl, &c, &element, status); 1948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru precompLen = c.precompLen; 1949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1954c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CFUNC int32_t U_EXPORT2 1955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprv_uca_canonicalClosure(tempUCATable *t, 1956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColTokenParser *src, 195727f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet *closed, 1958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *status) 1959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enumStruct context; 196127f654740f2a26ad62a5c155af9199af9e69b889claireho context.closed = closed; 1962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru context.noOfClosures = 0; 1963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCAElements el; 1964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UColToken *tok; 1965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i = 0, j = 0; 1966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar baseChar, firstCM; 1967b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 fcdHighStart; 1968b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 196950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho context.nfcImpl=Normalizer2Factory::getNFCImpl(*status); 1970b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*status)) { 1971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollator *tempColl = NULL; 1975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status); 1976c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointer 1977c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 1978c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 1979c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status); 1982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempColl = ucol_initCollator(tempData, 0, t->UCA, status); 1983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( tempTable->cmLookup != NULL ) { 1984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->cmLookup = tempTable->cmLookup; // copy over to t 1985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempTable->cmLookup = NULL; 1986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_closeTempTable(tempTable); 1988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*status)) { 1990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempColl->ucaRules = NULL; 1991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempColl->actualLocale = NULL; 1992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempColl->validLocale = NULL; 1993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempColl->requestedLocale = NULL; 1994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempColl->hasRealData = TRUE; 1995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tempColl->freeImageOnClose = TRUE; 1996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(tempData != 0) { 1997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(tempData); 1998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* produce canonical closure */ 2001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status); 2002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointer 2003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (U_FAILURE(*status)) { 2004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return 0; 2005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 2006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru context.t = t; 2007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru context.tempColl = tempColl; 2008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru context.colEl = colEl; 2009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru context.status = status; 2010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_enumCharTypes(_enumCategoryRangeClosureCategory, &context); 2011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (src==NULL) || !src->buildCCTabFlag ) { 2013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_closeElements(colEl); 2014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_close(tempColl); 2015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return context.noOfClosures; // no extra contraction needed to add 2016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i < src->resultLen; i++) { 2019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru baseChar = firstCM= (UChar)0; 2020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tok = src->lh[i].first; 2021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tok != NULL && U_SUCCESS(*status)) { 2022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefix = el.prefixChars; 2023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cPoints = el.uchars; 2024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(tok->prefix != 0) { 2025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefixSize = tok->prefix>>24; 2026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar)); 2027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cSize = (tok->source >> 24)-(tok->prefix>>24); 2029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar)); 2030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 2031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.prefixSize = 0; 2032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *el.prefix = 0; 2033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru el.cSize = (tok->source >> 24); 2035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar)); 2036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(src->UCA != NULL) { 2038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j = 0; j<el.cSize; j++) { 2039b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int16_t fcd = unorm_getFCD16(fcdTrieIndex, el.cPoints[j]); 2040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (fcd & 0xff) == 0 ) { 2041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru baseChar = el.cPoints[j]; // last base character 2042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstCM=0; // reset combining mark value 2043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 2045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (baseChar!=0) && (firstCM==0) ) { 2046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru firstCM = el.cPoints[j]; // first combining mark 2047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( (baseChar!= (UChar)0) && (firstCM != (UChar)0) ) { 2052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find all the canonical rules 2053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_uca_addTailCanonicalClosures(t, colEl, baseChar, firstCM, &el, status); 2054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tok = tok->next; 2056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 2058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_closeElements(colEl); 2059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucol_close(tempColl); 206027f654740f2a26ad62a5c155af9199af9e69b889claireho 2061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return context.noOfClosures; 2062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 2063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 2065