1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2001-2010, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)******************************************************************************* 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: ucaelems.cpp 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created 02/22/2001 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Vladimir Weinstein 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* This program reads the Franctional UCA table and generates 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* internal format for UCA table as well as inverse UCA table. 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* It then writes binary files containing the data: ucadata.dat 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* & invuca.dat 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* date name comments 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 03/02/2001 synwee added setMaxExpansion 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 03/07/2001 synwee merged UCA's maxexpansion and tailoring's 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_COLLATION 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchar.h" 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h" 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/ucoleitr.h" 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normlzr.h" 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "normalizer2impl.h" 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucol_elm.h" 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucol_tok.h" 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucol_cnt.h" 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/caniter.h" 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_USE 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status); 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t U_CALLCONV 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)prefixLookupHash(const UHashTok e) { 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element = (UCAElements *)e.pointer; 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar buf[256]; 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UHashTok key; 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) key.pointer = buf; 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(buf, element->cPoints, element->cSize*sizeof(UChar)); 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf[element->cSize] = 0; 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //key.pointer = element->cPoints; 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //element->cPoints[element->cSize] = 0; 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return uhash_hashUChars(key); 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int8_t U_CALLCONV 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)prefixLookupComp(const UHashTok e1, const UHashTok e2) { 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element1 = (UCAElements *)e1.pointer; 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element2 = (UCAElements *)e2.pointer; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar buf1[256]; 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UHashTok key1; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) key1.pointer = buf1; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(buf1, element1->cPoints, element1->cSize*sizeof(UChar)); 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf1[element1->cSize] = 0; 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar buf2[256]; 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UHashTok key2; 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) key2.pointer = buf2; 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(buf2, element2->cPoints, element2->cSize*sizeof(UChar)); 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buf2[element2->cSize] = 0; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return uhash_compareUChars(key1, key2); 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t uprv_uca_addExpansion(ExpansionTable *expansions, uint32_t value, UErrorCode *status) { 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(expansions->CEs == NULL) { 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t)); 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (expansions->CEs == NULL) { 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->size = INIT_EXP_TABLE_SIZE; 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->position = 0; 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(expansions->position == expansions->size) { 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *newData = (uint32_t *)uprv_realloc(expansions->CEs, 2*expansions->size*sizeof(uint32_t)); 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(newData == NULL) { 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "out of memory for expansions\n"); 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->CEs = newData; 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->size *= 2; 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansions->CEs[expansions->position] = value; 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return(expansions->position++); 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI tempUCATable* U_EXPORT2 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) { 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxJamoExpansionTable *maxjet; 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxExpansionTable *maxet; 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t == NULL) { 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(t, 0, sizeof(tempUCATable)); 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxet == NULL) { 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(maxet, 0, sizeof(MaxExpansionTable)); 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->maxExpansions = maxet; 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxjet == NULL) { 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(maxjet, 0, sizeof(MaxJamoExpansionTable)); 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->maxJamoExpansions = maxjet; 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->image = image; 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->options = opts; 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->UCA = UCA; 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expansions == NULL) { 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(t->expansions, 0, sizeof(ExpansionTable)); 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->mapping = utrie_open(NULL, NULL, UCOL_ELM_TRIE_CAPACITY, 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_SPECIAL_FLAG | (initTag<<24), 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24), 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TRUE); // Do your own mallocs for the structure, array and have linear Latin 1 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, NULL, status); 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock); 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->contractions = uprv_cnttab_open(t->mapping, status); 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy UCA's maxexpansion and merge as we go along */ 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (UCA != NULL) { 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* adding an extra initial value for easier manipulation */ 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet->size = (int32_t)(UCA->lastEndExpansionCE - UCA->endExpansionCE) + 2; 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet->position = maxet->size - 1; 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet->endExpansionCE = 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint32_t *)uprv_malloc(sizeof(uint32_t) * maxet->size); 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxet->endExpansionCE == NULL) { 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet->expansionCESize = 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t *)uprv_malloc(sizeof(uint8_t) * maxet->size); 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxet->expansionCESize == NULL) { 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* initialized value */ 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxet->endExpansionCE) = 0; 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxet->expansionCESize) = 0; 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(maxet->endExpansionCE + 1, UCA->endExpansionCE, 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(uint32_t) * (maxet->size - 1)); 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(maxet->expansionCESize + 1, UCA->expansionCESize, 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sizeof(uint8_t) * (maxet->size - 1)); 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxet->size = 0; 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->endExpansionCE = NULL; 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->isV = NULL; 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->size = 0; 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->position = 0; 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->maxLSize = 1; 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->maxVSize = 1; 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjet->maxTSize = 1; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->unsafeCP == NULL) { 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->contrEndCP == NULL) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto allocation_failure; 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(t->unsafeCP, 0, UCOL_UNSAFECP_TABLE_SIZE); 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(t->contrEndCP, 0, UCOL_UNSAFECP_TABLE_SIZE); 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup = NULL; 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return t; 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)allocation_failure: 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanup: 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_closeTempTable(t); 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static tempUCATable* U_EXPORT2 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_cloneTempTable(tempUCATable *t, UErrorCode *status) { 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempUCATable *r = (tempUCATable *)uprv_malloc(sizeof(tempUCATable)); 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r == NULL) { 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(r, 0, sizeof(tempUCATable)); 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* mapping */ 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->mapping != NULL) { 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*r->mapping = ucmpe32_clone(t->mapping, status);*/ 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->mapping = utrie_clone(NULL, t->mapping, NULL, 0); 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // a hashing clone function would be very nice. We have none currently... 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // However, we should be good, as closing should not produce any prefixed elements. 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->prefixLookup = NULL; // prefixes are not used in closing 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* expansions */ 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->expansions != NULL) { 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->expansions = (ExpansionTable *)uprv_malloc(sizeof(ExpansionTable)); 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->expansions == NULL) { 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->expansions->position = t->expansions->position; 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->expansions->size = t->expansions->size; 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->expansions->CEs != NULL) { 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->expansions->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->expansions->size); 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->expansions->CEs == NULL) { 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position); 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->expansions->CEs = NULL; 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->contractions != NULL) { 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->contractions = uprv_cnttab_clone(t->contractions, status); 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check for cloning failure. 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->contractions == NULL) { 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->contractions->mapping = r->mapping; 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->maxExpansions != NULL) { 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions = (MaxExpansionTable *)uprv_malloc(sizeof(MaxExpansionTable)); 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxExpansions == NULL) { 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->size = t->maxExpansions->size; 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->position = t->maxExpansions->position; 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->maxExpansions->endExpansionCE != NULL) { 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxExpansions->size); 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxExpansions->endExpansionCE == NULL) { 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(r->maxExpansions->endExpansionCE, 0xDB, sizeof(uint32_t)*t->maxExpansions->size); 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t)); 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->endExpansionCE = NULL; 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->maxExpansions->expansionCESize != NULL) { 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->expansionCESize = (uint8_t *)uprv_malloc(sizeof(uint8_t)*t->maxExpansions->size); 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxExpansions->expansionCESize == NULL) { 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(r->maxExpansions->expansionCESize, 0xDB, sizeof(uint8_t)*t->maxExpansions->size); 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t)); 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxExpansions->expansionCESize = NULL; 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->maxJamoExpansions != NULL) { 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions = (MaxJamoExpansionTable *)uprv_malloc(sizeof(MaxJamoExpansionTable)); 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxJamoExpansions == NULL) { 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->size = t->maxJamoExpansions->size; 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->position = t->maxJamoExpansions->position; 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->maxLSize = t->maxJamoExpansions->maxLSize; 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->maxVSize = t->maxJamoExpansions->maxVSize; 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->maxTSize = t->maxJamoExpansions->maxTSize; 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->maxJamoExpansions->size != 0) { 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->endExpansionCE = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->maxJamoExpansions->size); 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxJamoExpansions->endExpansionCE == NULL) { 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t)); 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size); 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->maxJamoExpansions->isV == NULL) { 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool)); 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->endExpansionCE = NULL; 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->maxJamoExpansions->isV = NULL; 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->unsafeCP != NULL) { 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->unsafeCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->unsafeCP == NULL) { 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->unsafeCP, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->contrEndCP != NULL) { 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->contrEndCP = (uint8_t *)uprv_malloc(UCOL_UNSAFECP_TABLE_SIZE); 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (r->contrEndCP == NULL) { 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) goto cleanup; 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(r->contrEndCP, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->UCA = t->UCA; 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->image = t->image; 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) r->options = t->options; 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return r; 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)cleanup: 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_closeTempTable(t); 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI void U_EXPORT2 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_closeTempTable(tempUCATable *t) { 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t != NULL) { 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->expansions != NULL) { 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->expansions->CEs); 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->expansions); 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->contractions != NULL) { 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_close(t->contractions); 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->mapping != NULL) { 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_close(t->mapping); 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->prefixLookup != NULL) { 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uhash_close(t->prefixLookup); 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->maxExpansions != NULL) { 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxExpansions->endExpansionCE); 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxExpansions->expansionCESize); 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxExpansions); 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->maxJamoExpansions->size > 0) { 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxJamoExpansions->endExpansionCE); 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxJamoExpansions->isV); 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->maxJamoExpansions); 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->unsafeCP); 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->contrEndCP); 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->cmLookup != NULL) { 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->cmLookup->cPoints); 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->cmLookup); 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t); 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Looks for the maximum length of all expansion sequences ending with the same 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* collation element. The size required for maxexpansion and maxsize is 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* returned if the arrays are too small. 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param endexpansion the last expansion collation element to be added 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param expansionsize size of the expansion 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param maxexpansion data structure to store the maximum expansion data. 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param status error status 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @returns size of the maxexpansion and maxsize used. 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int uprv_uca_setMaxExpansion(uint32_t endexpansion, 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t expansionsize, 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxExpansionTable *maxexpansion, 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->size == 0) { 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we'll always make the first element 0, for easier manipulation */ 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(int32_t)); 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->endExpansionCE == NULL) { 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxexpansion->endExpansionCE) = 0; 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->expansionCESize = 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint8_t)); 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */; 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->expansionCESize == NULL) { 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxexpansion->expansionCESize) = 0; 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size = INIT_EXP_TABLE_SIZE; 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->position = 0; 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->position + 1 == maxexpansion->size) { 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *neweece = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2 * maxexpansion->size * sizeof(uint32_t)); 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (neweece == NULL) { 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = neweece; 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *neweces = (uint8_t *)uprv_realloc(maxexpansion->expansionCESize, 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2 * maxexpansion->size * sizeof(uint8_t)); 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (neweces == NULL) { 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->expansionCESize = neweces; 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size *= 2; 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pendexpansionce = maxexpansion->endExpansionCE; 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *pexpansionsize = maxexpansion->expansionCESize; 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int pos = maxexpansion->position; 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *start = pendexpansionce; 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *limit = pendexpansionce + pos; 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* using binary search to determine if last expansion element is 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) already in the array */ 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *mid; 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int result = -1; 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (start < limit - 1) { 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) mid = start + ((limit - start) >> 1); 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (endexpansion <= *mid) { 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit = mid; 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start = mid; 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*start == endexpansion) { 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int)(start - pendexpansionce); 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else if (*limit == endexpansion) { 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result = (int)(limit - pendexpansionce); 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (result > -1) { 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* found the ce in expansion, we'll just modify the size if it is 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) smaller */ 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *currentsize = pexpansionsize + result; 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*currentsize < expansionsize) { 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *currentsize = expansionsize; 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we'll need to squeeze the value into the array. 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) initial implementation. */ 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* shifting the subarray down by 1 */ 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int shiftsize = (int)((pendexpansionce + pos) - start); 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *shiftpos = start + 1; 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce); 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* okay need to rearrange the array into sorted order */ 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */ 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(pendexpansionce + pos + 1) = endexpansion; 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(pexpansionsize + pos + 1) = expansionsize; 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(shiftpos + 1, shiftpos, shiftsize * sizeof(int32_t)); 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(sizeshiftpos + 1, sizeshiftpos, 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) shiftsize * sizeof(uint8_t)); 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *shiftpos = endexpansion; 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *sizeshiftpos = expansionsize; 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->position ++; 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int temp; 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool found = FALSE; 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (temp = 0; temp < maxexpansion->position; temp ++) { 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pendexpansionce[temp] >= pendexpansionce[temp + 1]) { 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "expansions %d\n", temp); 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pendexpansionce[temp] == endexpansion) { 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) found =TRUE; 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pexpansionsize[temp] < expansionsize) { 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "expansions size %d\n", temp); 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pendexpansionce[temp] == endexpansion) { 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) found =TRUE; 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (pexpansionsize[temp] < expansionsize) { 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "expansions size %d\n", temp); 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (!found) 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "expansion not found %d\n", temp); 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return maxexpansion->position; 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Sets the maximum length of all jamo expansion sequences ending with the same 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* collation element. The size required for maxexpansion and maxsize is 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* returned if the arrays are too small. 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param ch the jamo codepoint 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param endexpansion the last expansion collation element to be added 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param expansionsize size of the expansion 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param maxexpansion data structure to store the maximum expansion data. 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @param status error status 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* @returns size of the maxexpansion and maxsize used. 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int uprv_uca_setMaxJamoExpansion(UChar ch, 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t endexpansion, 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t expansionsize, 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxJamoExpansionTable *maxexpansion, 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool isV = TRUE; 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (((uint32_t)ch - 0x1100) <= (0x1112 - 0x1100)) { 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* determines L for Jamo, doesn't need to store this since it is never 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) at the end of a expansion */ 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->maxLSize < expansionsize) { 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->maxLSize = expansionsize; 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return maxexpansion->position; 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (((uint32_t)ch - 0x1161) <= (0x1175 - 0x1161)) { 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* determines V for Jamo */ 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->maxVSize < expansionsize) { 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->maxVSize = expansionsize; 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (((uint32_t)ch - 0x11A8) <= (0x11C2 - 0x11A8)) { 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isV = FALSE; 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* determines T for Jamo */ 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->maxTSize < expansionsize) { 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->maxTSize = expansionsize; 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->size == 0) { 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we'll always make the first element 0, for easier manipulation */ 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(uint32_t)); 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */; 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->endExpansionCE == NULL) { 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxexpansion->endExpansionCE) = 0; 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->isV = 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (UBool *)uprv_malloc(INIT_EXP_TABLE_SIZE * sizeof(UBool)); 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */; 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->isV == NULL) { 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(maxexpansion->endExpansionCE); 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = NULL; 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxexpansion->isV) = 0; 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size = INIT_EXP_TABLE_SIZE; 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->position = 0; 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->position + 1 == maxexpansion->size) { 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size *= 2; 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = (uint32_t *)uprv_realloc(maxexpansion->endExpansionCE, 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size * sizeof(uint32_t)); 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->endExpansionCE == NULL) { 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "out of memory for maxExpansions\n"); 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->isV = (UBool *)uprv_realloc(maxexpansion->isV, 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->size * sizeof(UBool)); 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (maxexpansion->isV == NULL) { 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "out of memory for maxExpansions\n"); 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(maxexpansion->endExpansionCE); 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->endExpansionCE = NULL; 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pendexpansionce = maxexpansion->endExpansionCE; 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int pos = maxexpansion->position; 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (pos > 0) { 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pos --; 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*(pendexpansionce + pos) == endexpansion) { 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return maxexpansion->position; 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(pendexpansionce + maxexpansion->position) = endexpansion; 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxexpansion->isV + maxexpansion->position) = isV; 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxexpansion->position ++; 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return maxexpansion->position; 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void ContrEndCPSet(uint8_t *table, UChar c) { 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t hash; 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *htByte; 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = c; 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) htByte = &table[hash>>3]; 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *htByte |= (1 << (hash & 7)); 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void unsafeCPSet(uint8_t *table, UChar c) { 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t hash; 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *htByte; 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = c; 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (hash >= UCOL_UNSAFECP_TABLE_SIZE*8) { 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (hash >= 0xd800 && hash <= 0xf8ff) { 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Part of a surrogate, or in private use area. */ 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* These don't go in the table */ 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) hash = (hash & UCOL_UNSAFECP_TABLE_MASK) + 256; 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) htByte = &table[hash>>3]; 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *htByte |= (1 << (hash & 7)); 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_createCMTable(tempUCATable *t, int32_t noOfCM, UErrorCode *status) { 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup = (CombinClassTable *)uprv_malloc(sizeof(CombinClassTable)); 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->cmLookup==NULL) { 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup->cPoints=(UChar *)uprv_malloc(noOfCM*sizeof(UChar)); 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->cmLookup->cPoints ==NULL) { 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(t->cmLookup); 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup = NULL; 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup->size=noOfCM; 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(t->cmLookup->index, 0, sizeof(t->cmLookup->index)); 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_copyCMTable(tempUCATable *t, UChar *cm, uint16_t *index) { 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count=0; 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t i=0; i<256; ++i) { 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (index[i]>0) { 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // cPoints is ordered by combining class value. 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(t->cmLookup->cPoints+count, cm+(i<<8), index[i]*sizeof(UChar)); 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count += index[i]; 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup->index[i]=count; 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 1. to the UnsafeCP hash table, add all chars with combining class != 0 */ 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2. build combining marks table for all chars with combining class != 0 */ 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void uprv_uca_unsafeCPAddCCNZ(tempUCATable *t, UErrorCode *status) { 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c; 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t fcd; // Hi byte is lead combining class. 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // lo byte is trailing combing class. 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *fcdTrieIndex; 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 fcdHighStart; 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool buildCMTable = (t->cmLookup==NULL); // flag for building combining class table 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *cm=NULL; 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t index[256]; 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count=0; 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (buildCMTable) { 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cm==NULL) { 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cm = (UChar *)uprv_malloc(sizeof(UChar)*UCOL_MAX_CM_TAB); 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cm==NULL) { 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(index, 0, sizeof(index)); 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (c=0; c<0xffff; c++) { 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fcd = unorm_getFCD16(fcdTrieIndex, c); 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (fcd >= 0x100 || // if the leading combining class(c) > 0 || 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (UTF_IS_LEAD(c) && fcd != 0)) {// c is a leading surrogate with some FCD data 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (buildCMTable) { 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t cClass = fcd & 0xff; 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //uint32_t temp=(cClass<<8)+index[cClass]; 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cm[(cClass<<8)+index[cClass]] = c; // 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) index[cClass]++; 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count++; 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, c); 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // copy to cm table 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (buildCMTable) { 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_createCMTable(t, count, status); 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cm!=NULL) { 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(cm); 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_copyCMTable(t, cm, index); 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->prefixLookup != NULL) { 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = -1; 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UHashElement *e = NULL; 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element = NULL; 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar NFCbuf[256]; 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t NFCbufLen = 0; 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((e = uhash_nextElement(t->prefixLookup, &i)) != NULL) { 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element = (UCAElements *)e->value.pointer; 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // codepoints here are in the NFD form. We need to add the 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // first code point of the NFC form to unsafe, because 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // strcoll needs to backup over them. 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NFCbufLen = unorm_normalize(element->cPoints, element->cSize, UNORM_NFC, 0, 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NFCbuf, 256, status); 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, NFCbuf[0]); 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cm!=NULL) { 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(cm); 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE, 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element, UErrorCode *status) 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // currently the longest prefix we're supporting in Japanese is two characters 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // long. Although this table could quite easily mimic complete contraction stuff 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // there is no good reason to make a general solution, as it would require some 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // error prone messing. 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CntTable *contractions = t->contractions; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp; 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t cpsize = 0; 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *oldCP = element->cPoints; 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t oldCPSize = element->cSize; 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) contractions->currentTag = SPEC_PROC_TAG; 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // here, we will normalize & add prefix to the table. 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t j = 0; 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<element->cSize; j++) { 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "CP: %04X ", element->cPoints[j]); 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "El: %08X Pref: ", CE); 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<element->prefixSize; j++) { 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%04X ", element->prefix[j]); 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%08X ", element->mapCE); 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (j = 1; j<element->prefixSize; j++) { /* First add NFD prefix chars to unsafe CP hash table */ 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Unless it is a trail surrogate, which is handled algoritmically and 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // shouldn't take up space in the table. 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UTF_IS_TRAIL(element->prefix[j]))) { 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, element->prefix[j]); 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar tempPrefix = 0; 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j = 0; j < /*nfcSize*/element->prefixSize/2; j++) { // prefixes are going to be looked up backwards 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // therefore, we will promptly reverse the prefix buffer... 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempPrefix = *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1); 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(/*nfcBuffer*/element->prefix+element->prefixSize-j-1) = element->prefix[j]; 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->prefix[j] = tempPrefix; 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "Reversed: "); 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<element->prefixSize; j++) { 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%04X ", element->prefix[j]); 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%08X\n", element->mapCE); 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the first codepoint is also unsafe, as it forms a 'contraction' with the prefix 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UTF_IS_TRAIL(element->cPoints[0]))) { 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, element->cPoints[0]); 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Maybe we need this... To handle prefixes completely in the forward direction... 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //if(element->cSize == 1) { 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // if(!(UTF_IS_TRAIL(element->cPoints[0]))) { 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // ContrEndCPSet(t->contrEndCP, element->cPoints[0]); 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // } 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //} 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints = element->prefix; 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize = element->prefixSize; 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Add the last char of the contraction to the contraction-end hash table. 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // unless it is a trail surrogate, which is handled algorithmically and 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // shouldn't be in the table 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) { 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // First we need to check if contractions starts with a surrogate 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp); 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If there are any Jamos in the contraction, we should turn on special 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // processing for Jamos 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCOL_ISJAMO(element->prefix[0])) { 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->image->jamoSpecial = TRUE; 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* then we need to deal with it */ 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we could aready have something in table - or we might not */ 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!isPrefix(CE)) { 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* if it wasn't contraction, we wouldn't end up here*/ 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t firstContractionOffset = 0; 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->prefix, newCE, status); 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = constructContractCE(SPEC_PROC_TAG, firstContractionOffset); 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* we are adding to existing contraction */ 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* there were already some elements in the table, so we need to add a new contraction */ 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Two things can happen here: either the codepoint is already in the table, or it is not */ 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t position = uprv_cnttab_findCP(contractions, CE, *element->prefix, status); 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(position > 0) { /* if it is we just continue down the chain */ 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_setContraction(contractions, CE, position, *(element->prefix), newCE, status); 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* if it isn't, we will have to create a new sequence */ 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_insertContraction(contractions, CE, *(element->prefix), element->mapCE, status); 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints = oldCP; 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize = oldCPSize; 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return CE; 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Note regarding surrogate handling: We are interested only in the single 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// or leading surrogates in a contraction. If a surrogate is somewhere else 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// in the contraction, it is going to be handled as a pair of code units, 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// as it doesn't affect the performance AND handling surrogates specially 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// would complicate code way too much. 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE, 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *element, UErrorCode *status) 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CntTable *contractions = t->contractions; 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp; 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t cpsize = 0; 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) contractions->currentTag = CONTRACTION_TAG; 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // First we need to check if contractions starts with a surrogate 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTF_NEXT_CHAR(element->cPoints, cpsize, element->cSize, cp); 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cpsize<element->cSize) { // This is a real contraction, if there are other characters after the first 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t j = 0; 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (j=1; j<element->cSize; j++) { /* First add contraction chars to unsafe CP hash table */ 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Unless it is a trail surrogate, which is handled algoritmically and 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // shouldn't take up space in the table. 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UTF_IS_TRAIL(element->cPoints[j]))) { 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, element->cPoints[j]); 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Add the last char of the contraction to the contraction-end hash table. 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // unless it is a trail surrogate, which is handled algorithmically and 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // shouldn't be in the table 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UTF_IS_TRAIL(element->cPoints[element->cSize -1]))) { 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ContrEndCPSet(t->contrEndCP, element->cPoints[element->cSize -1]); 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // If there are any Jamos in the contraction, we should turn on special 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // processing for Jamos 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCOL_ISJAMO(element->cPoints[0])) { 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->image->jamoSpecial = TRUE; 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* then we need to deal with it */ 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we could aready have something in table - or we might not */ 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints+=cpsize; 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize-=cpsize; 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!isContraction(CE)) { 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* if it wasn't contraction, we wouldn't end up here*/ 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t firstContractionOffset = 0; 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, CE, status); 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, CE, status); 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = constructContractCE(CONTRACTION_TAG, firstContractionOffset); 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* we are adding to existing contraction */ 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* there were already some elements in the table, so we need to add a new contraction */ 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Two things can happen here: either the codepoint is already in the table, or it is not */ 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t position = uprv_cnttab_findCP(contractions, CE, *element->cPoints, status); 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(position > 0) { /* if it is we just continue down the chain */ 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t eCE = uprv_cnttab_getCE(contractions, CE, position, status); 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_setContraction(contractions, CE, position, *(element->cPoints), newCE, status); 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* if it isn't, we will have to create a new sequence */ 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_insertContraction(contractions, CE, *(element->cPoints), newCE, status); 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints-=cpsize; 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize+=cpsize; 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ucmpe32_set(t->mapping, cp, CE);*/ 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_set32(t->mapping, cp, CE); 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(!isContraction(CE)) { /* this is just a surrogate, and there is no contraction */ 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ucmpe32_set(t->mapping, cp, element->mapCE);*/ 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_set32(t->mapping, cp, element->mapCE); 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* fill out the first stage of the contraction with the surrogate CE */ 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_changeContraction(contractions, CE, 0, element->mapCE, status); 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_changeContraction(contractions, CE, 0xFFFF, element->mapCE, status); 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return CE; 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t uprv_uca_processContraction(CntTable *contractions, UCAElements *element, uint32_t existingCE, UErrorCode *status) { 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t firstContractionOffset = 0; 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // uint32_t contractionElement = UCOL_NOT_FOUND; 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UCOL_NOT_FOUND; 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* end of recursion */ 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->cSize == 1) { 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(isCntTableElement(existingCE) && ((UColCETags)getCETag(existingCE) == contractions->currentTag)) { 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_changeContraction(contractions, existingCE, 0, element->mapCE, status); 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_changeContraction(contractions, existingCE, 0xFFFF, element->mapCE, status); 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return existingCE; 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return element->mapCE; /*can't do just that. existingCe might be a contraction, meaning that we need to do another step */ 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this recursion currently feeds on the only element we have... We will have to copy it in order to accomodate */ 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* for both backward and forward cycles */ 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we encountered either an empty space or a non-contraction element */ 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* this means we are constructing a new contraction sequence */ 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints++; 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize--; 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!isCntTableElement(existingCE)) { 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* if it wasn't contraction, we wouldn't end up here*/ 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) firstContractionOffset = uprv_cnttab_addContraction(contractions, UPRV_CNTTAB_NEWELEMENT, 0, existingCE, status); 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, *element->cPoints, newCE, status); 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_addContraction(contractions, firstContractionOffset, 0xFFFF, existingCE, status); 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) existingCE = constructContractCE(contractions->currentTag, firstContractionOffset); 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* we are adding to existing contraction */ 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* there were already some elements in the table, so we need to add a new contraction */ 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Two things can happen here: either the codepoint is already in the table, or it is not */ 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t position = uprv_cnttab_findCP(contractions, existingCE, *element->cPoints, status); 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(position > 0) { /* if it is we just continue down the chain */ 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t eCE = uprv_cnttab_getCE(contractions, existingCE, position, status); 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, eCE, status); 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_setContraction(contractions, existingCE, position, *(element->cPoints), newCE, status); 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* if it isn't, we will have to create a new sequence */ 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t newCE = uprv_uca_processContraction(contractions, element, UCOL_NOT_FOUND, status); 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_insertContraction(contractions, existingCE, *(element->cPoints), newCE, status); 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cPoints--; 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize++; 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return existingCE; 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static uint32_t uprv_uca_finalizeAddition(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t CE = UCOL_NOT_FOUND; 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This should add a completely ignorable element to the 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // unsafe table, so that backward iteration will skip 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // over it when treating contractions. 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t i = 0; 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->mapCE == 0) { 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i = 0; i < element->cSize; i++) { 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!UTF_IS_TRAIL(element->cPoints[i])) { 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, element->cPoints[i]); 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->cSize > 1) { /* we're adding a contraction */ 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t i = 0; 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp; 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTF_NEXT_CHAR(element->cPoints, i, element->cSize, cp); 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*CE = ucmpe32_get(t->mapping, cp);*/ 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = utrie_get32(t->mapping, cp, NULL); 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = uprv_uca_addContraction(t, CE, element, status); 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { /* easy case, */ 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*CE = ucmpe32_get(t->mapping, element->cPoints[0]);*/ 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = utrie_get32(t->mapping, element->cPoints[0], NULL); 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( CE != UCOL_NOT_FOUND) { 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(isCntTableElement(CE) /*isContraction(CE)*/) { /* adding a non contraction element (thai, expansion, single) to already existing contraction */ 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!isPrefix(element->mapCE)) { // we cannot reenter prefix elements - as we are going to create a dead loop 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Only expansions and regular CEs can go here... Contractions will never happen in this place 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_setContraction(t->contractions, CE, 0, 0, element->mapCE, status); 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* This loop has to change the CE at the end of contraction REDO!*/ 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_cnttab_changeLastCE(t->contractions, CE, element->mapCE, status); 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_set32(t->mapping, element->cPoints[0], element->mapCE); 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((element->prefixSize!=0) && (!isSpecial(CE) || (getCETag(CE)!=IMPLICIT_TAG))) { 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *origElem = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (origElem== NULL) { 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy the original UCA value */ 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->prefixSize = 0; 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->prefix = NULL; 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->cPoints = origElem->uchars; 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->cPoints[0] = element->cPoints[0]; 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->cSize = 1; 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->CEs[0]=CE; 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->mapCE=CE; 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) origElem->noOfCEs=1; 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_finalizeAddition(t, origElem, status); 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(origElem); 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "Warning - trying to overwrite existing data %08X for cp %04X with %08X\n", CE, element->cPoints[0], element->CEs[0]); 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //*status = U_ILLEGAL_ARGUMENT_ERROR; 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ucmpe32_set(t->mapping, element->cPoints[0], element->mapCE);*/ 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_set32(t->mapping, element->cPoints[0], element->mapCE); 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return CE; 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* This adds a read element, while testing for existence */ 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI uint32_t U_EXPORT2 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_addAnElement(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_NAMESPACE_USE 1133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ExpansionTable *expansions = t->expansions; 1135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t i = 1; 1137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t expansion = 0; 1138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t CE; 1139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 1141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xFFFF; 1142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = 0; // clear mapCE so that we can catch expansions 1145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->noOfCEs == 1) { 1147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = element->CEs[0]; 1148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* ICU 2.1 long primaries */ 1150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* unfortunately, it looks like we have to look for a long primary here */ 1151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* since in canonical closure we are going to hit some long primaries from */ 1152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the first phase, and they will come back as continuations/expansions */ 1153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* destroying the effect of the previous opitimization */ 1154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* A long primary is a three byte primary with starting secondaries and tertiaries */ 1155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* It can appear in long runs of only primary differences (like east Asian tailorings) */ 1156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* also, it should not be an expansion, as expansions would break with this */ 1157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This part came in from ucol_bld.cpp 1158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //if(tok->expansion == 0 1159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //&& noOfBytes[0] == 3 && noOfBytes[1] == 1 && noOfBytes[2] == 1 1160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //&& CEparts[1] == (UCOL_BYTE_COMMON << 24) && CEparts[2] == (UCOL_BYTE_COMMON << 24)) { 1161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we will construct a special CE that will go unchanged to the table */ 1162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->noOfCEs == 2 // a two CE expansion 1163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && isContinuation(element->CEs[1]) // which is a continuation 1164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, 1165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary 1166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary 1167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) 1168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "Long primary %04X\n", element->cPoints[0]); 1171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special 1173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary 1174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((element->CEs[1]>>24) & 0xFF); // third byte of primary 1175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) 1178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) 1179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) & 0xFFFFF0)); 1180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i = 1; i<element->noOfCEs; i++) { 1182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addExpansion(expansions, element->CEs[i], status); 1183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->noOfCEs <= 0xF) { 1185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion |= element->noOfCEs; 1186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addExpansion(expansions, 0, status); 1188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = expansion; 1190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], 1191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t)element->noOfCEs, 1192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->maxExpansions, 1193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status); 1194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCOL_ISJAMO(element->cPoints[0])) { 1195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->image->jamoSpecial = TRUE; 1196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxJamoExpansion(element->cPoints[0], 1197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->CEs[element->noOfCEs - 1], 1198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t)element->noOfCEs, 1199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->maxJamoExpansions, 1200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status); 1201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 1203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We treat digits differently - they are "uber special" and should be 1209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // processed differently if numeric collation is on. 1210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 uniChar = 0; 1211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //printElement(element); 1212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((element->cSize == 2) && U16_IS_LEAD(element->cPoints[0])){ 1213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uniChar = U16_GET_SUPPLEMENTARY(element->cPoints[0], element->cPoints[1]); 1214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if (element->cSize == 1){ 1215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uniChar = element->cPoints[0]; 1216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only 1219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // one element to the expansion buffer. When we encounter a digit and we don't 1220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // do numeric collation, we will just pick the CE we have and break out of case 1221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked 1222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // a special, further processing will occur. If it's a simple CE, we'll return due 1223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to how the loop is constructed. 1224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (uniChar != 0 && u_isdigit(uniChar)){ 1225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element 1226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->mapCE) { // if there is an expansion, we'll pick it here 1227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4); 1228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4); 1230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = expansion; 1232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Need to go back to the beginning of the digit string if in the middle! 1234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars 1235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) unsafeCPSet(t->unsafeCP, (UChar)uniChar); 1236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // here we want to add the prefix structure. 1240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // I will try to process it as a reverse contraction, if possible. 1241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // prefix buffer is already reversed. 1242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->prefixSize!=0) { 1244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We keep the seen prefix starter elements in a hashtable 1245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // we need it to be able to distinguish between the simple 1246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // codepoints and prefix starters. Also, we need to use it 1247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // for canonical closure. 1248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *composed = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 1251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (composed == NULL) { 1252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 1253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(composed, element, sizeof(UCAElements)); 1256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) composed->cPoints = composed->uchars; 1257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) composed->prefix = composed->prefixChars; 1258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) composed->prefixSize = unorm_normalize(element->prefix, element->prefixSize, UNORM_NFC, 0, composed->prefix, 128, status); 1260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(t->prefixLookup != NULL) { 1263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *uCE = (UCAElements *)uhash_get(t->prefixLookup, element); 1264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(uCE != NULL) { // there is already a set of code points here 1265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = uprv_uca_addPrefix(t, uCE->mapCE, element, status); 1266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { // no code points, so this spot is clean 1267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = uprv_uca_addPrefix(t, UCOL_NOT_FOUND, element, status); 1268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uCE = (UCAElements *)uprv_malloc(sizeof(UCAElements)); 1269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 1270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (uCE == NULL) { 1271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 1272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(uCE, element, sizeof(UCAElements)); 1275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uCE->cPoints = uCE->uchars; 1276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uhash_put(t->prefixLookup, uCE, uCE, status); 1277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(composed->prefixSize != element->prefixSize || uprv_memcmp(composed->prefix, element->prefix, element->prefixSize)) { 1279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // do it! 1280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) composed->mapCE = uprv_uca_addPrefix(t, element->mapCE, composed, status); 1281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(composed); 1284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // We need to use the canonical iterator here 1287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the way we do it is to generate the canonically equivalent strings 1288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // for the contraction and then add the sequences that pass FCD check 1289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->cSize > 1 && !(element->cSize==2 && UTF16_IS_LEAD(element->cPoints[0]) && UTF16_IS_TRAIL(element->cPoints[1]))) { // this is a contraction, we should check whether a composed form should also be included 1290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString source(element->cPoints, element->cSize); 1291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CanonicalIterator it(source, *status); 1292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source = it.next(); 1293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(!source.isBogus()) { 1294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(Normalizer::quickCheck(source, UNORM_FCD, *status) != UNORM_NO) { 1295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->cSize = source.extract(element->cPoints, 128, *status); 1296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_finalizeAddition(t, element, status); 1297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source = it.next(); 1299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = element->mapCE; 1301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CE = uprv_uca_finalizeAddition(t, element, status); 1303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return CE; 1306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */ 1310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping, 1311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxExpansionTable *maxexpansion, 1312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxJamoExpansionTable *maxjamoexpansion, 1313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool jamospecial, 1314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) 1315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t VBASE = 0x1161; 1317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t TBASE = 0x11A8; 1318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t VCOUNT = 21; 1319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t TCOUNT = 28; 1320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t v = VBASE + VCOUNT - 1; 1322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t t = TBASE + TCOUNT - 1; 1323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t ce; 1324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (v >= VBASE) { 1326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ce = ucmpe32_get(mapping, v);*/ 1327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ce = utrie_get32(mapping, v, NULL); 1328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ce < UCOL_SPECIAL_FLAG) { 1329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion(ce, 2, maxexpansion, status); 1330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) v --; 1332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (t >= TBASE) 1335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ce = ucmpe32_get(mapping, t);*/ 1337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ce = utrie_get32(mapping, t, NULL); 1338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (ce < UCOL_SPECIAL_FLAG) { 1339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion(ce, 3, maxexpansion, status); 1340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t --; 1342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* According to the docs, 99% of the time, the Jamo will not be special */ 1344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (jamospecial) { 1345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* gets the max expansion in all unicode characters */ 1346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int count = maxjamoexpansion->position; 1347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t maxTSize = (uint8_t)(maxjamoexpansion->maxLSize + 1348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjamoexpansion->maxVSize + 1349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjamoexpansion->maxTSize); 1350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t maxVSize = (uint8_t)(maxjamoexpansion->maxLSize + 1351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxjamoexpansion->maxVSize); 1352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (count > 0) { 1354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count --; 1355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (*(maxjamoexpansion->isV + count) == TRUE) { 1356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion( 1357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxjamoexpansion->endExpansionCE + count), 1358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxVSize, maxexpansion, status); 1359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion( 1362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *(maxjamoexpansion->endExpansionCE + count), 1363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxTSize, maxexpansion, status); 1364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 1370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static inline uint32_t U_CALLCONV 1371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) 1372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 1374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t tag; 1375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 limit; 1376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool inBlockZero; 1377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=start+0x400; 1379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(start<limit) { 1380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=utrie_get32(trie, start, &inBlockZero); 1381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tag = getCETag(value); 1382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(inBlockZero == TRUE) { 1383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start+=UTRIE_DATA_BLOCK_LENGTH; 1384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(!(isSpecial(value) && (tag == IMPLICIT_TAG || tag == NOT_FOUND_TAG))) { 1385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* These are values that are starting in either UCA (IMPLICIT_TAG) or in the 1386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * tailorings (NOT_FOUND_TAG). Presence of these tags means that there is 1387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * nothing in this position and that it should be skipped. 1388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) static int32_t count = 1; 1391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%i, Folded %08X, value %08X\n", count++, start, value); 1392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (uint32_t)(UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24) | offset); 1394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++start; 1396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 1401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// This is a debug function to print the contents of a trie. 1404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// It is used in conjuction with the code around utrie_unserialize call 1405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool enumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value) { 1406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start<0x10000) { 1407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%08X, %08X, %08X\n", start, limit, value); 1408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stdout, "%08X=%04X %04X, %08X=%04X %04X, %08X\n", start, UTF16_LEAD(start), UTF16_TRAIL(start), limit, UTF16_LEAD(limit), UTF16_TRAIL(limit), value); 1410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 1412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t 1415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)myGetFoldingOffset(uint32_t data) { 1416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(data > UCOL_NOT_FOUND && getCETag(data) == SURROGATE_TAG) { 1417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (data&0xFFFFFF); 1418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CAPI UCATableHeader* U_EXPORT2 1425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_assembleTable(tempUCATable *t, UErrorCode *status) { 1426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*CompactEIntArray *mapping = t->mapping;*/ 1427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNewTrie *mapping = t->mapping; 1428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ExpansionTable *expansions = t->expansions; 1429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CntTable *contractions = t->contractions; 1430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) MaxExpansionTable *maxexpansion = t->maxExpansions; 1431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 1433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t beforeContractions = (uint32_t)((headersize+paddedsize(expansions->position*sizeof(uint32_t)))/sizeof(UChar)); 1437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t contractionsSize = 0; 1439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) contractionsSize = uprv_cnttab_constructTable(contractions, beforeContractions, status); 1440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the following operation depends on the trie data. Therefore, we have to do it before */ 1442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the trie is compacted */ 1443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* sets jamo expansions */ 1444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_getMaxExpansionJamo(mapping, maxexpansion, t->maxJamoExpansions, 1445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->image->jamoSpecial, status); 1446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*ucmpe32_compact(mapping);*/ 1448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*UMemoryStream *ms = uprv_mstrm_openNew(8192);*/ 1449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*int32_t mappingSize = ucmpe32_flattenMem(mapping, ms);*/ 1450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*const uint8_t *flattened = uprv_mstrm_getBuffer(ms, &mappingSize);*/ 1451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // After setting the jamo expansions, compact the trie and get the needed size 1453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t mappingSize = utrie_serialize(mapping, NULL, 0, getFoldedValue /*getFoldedValue*/, FALSE, status); 1454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t tableOffset = 0; 1456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *dataStart; 1457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* TODO: LATIN1 array is now in the utrie - it should be removed from the calculation */ 1459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t toAllocate =(uint32_t)(headersize+ 1461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(expansions->position*sizeof(uint32_t))+ 1462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(mappingSize)+ 1463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(contractionsSize*(sizeof(UChar)+sizeof(uint32_t)))+ 1464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //paddedsize(0x100*sizeof(uint32_t)) /* Latin1 is now included in the trie */ 1465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* maxexpansion array */ 1466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) + paddedsize(maxexpansion->position * sizeof(uint32_t)) + 1467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* maxexpansion size array */ 1468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(maxexpansion->position * sizeof(uint8_t)) + 1469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(UCOL_UNSAFECP_TABLE_SIZE) + /* Unsafe chars */ 1470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) paddedsize(UCOL_UNSAFECP_TABLE_SIZE)); /* Contraction Ending chars */ 1471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dataStart = (uint8_t *)uprv_malloc(toAllocate); 1474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* test for NULL */ 1475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (dataStart == NULL) { 1476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_MEMORY_ALLOCATION_ERROR; 1477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return NULL; 1478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCATableHeader *myData = (UCATableHeader *)dataStart; 1481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Please, do reset all the fields! 1482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memset(dataStart, 0, toAllocate); 1483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Make sure we know this is reset 1484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->magic = UCOL_HEADER_MAGIC; 1485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->isBigEndian = U_IS_BIG_ENDIAN; 1486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->charSetFamily = U_CHARSET_FAMILY; 1487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->formatVersion[0] = UCA_FORMAT_VERSION_0; 1488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->formatVersion[1] = UCA_FORMAT_VERSION_1; 1489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->formatVersion[2] = UCA_FORMAT_VERSION_2; 1490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->formatVersion[3] = UCA_FORMAT_VERSION_3; 1491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->jamoSpecial = t->image->jamoSpecial; 1492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Don't copy stuff from UCA header! 1494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //uprv_memcpy(myData, t->image, sizeof(UCATableHeader)); 1495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contractionSize = contractionsSize; 1497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize(sizeof(UCATableHeader))); 1499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->options = tableOffset; 1501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart+tableOffset, t->options, sizeof(UColOptionSet)); 1502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize(sizeof(UColOptionSet))); 1503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy expansions */ 1505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*myData->expansion = (uint32_t *)dataStart+tableOffset;*/ 1506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->expansion = tableOffset; 1507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart+tableOffset, expansions->CEs, expansions->position*sizeof(uint32_t)); 1508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize(expansions->position*sizeof(uint32_t))); 1509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* contractions block */ 1511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(contractionsSize != 0) { 1512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy contraction index */ 1513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*myData->contractionIndex = (UChar *)(dataStart+tableOffset);*/ 1514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contractionIndex = tableOffset; 1515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart+tableOffset, contractions->codePoints, contractionsSize*sizeof(UChar)); 1516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(UChar))); 1517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy contraction collation elements */ 1519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*myData->contractionCEs = (uint32_t *)(dataStart+tableOffset);*/ 1520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contractionCEs = tableOffset; 1521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart+tableOffset, contractions->CEs, contractionsSize*sizeof(uint32_t)); 1522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t))); 1523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contractionIndex = 0; 1525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contractionCEs = 0; 1526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy mapping table */ 1529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*myData->mappingPosition = dataStart+tableOffset;*/ 1530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*myData->mappingPosition = tableOffset;*/ 1531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*uprv_memcpy(dataStart+tableOffset, flattened, mappingSize);*/ 1532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->mappingPosition = tableOffset; 1534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_serialize(mapping, dataStart+tableOffset, toAllocate-tableOffset, getFoldedValue, FALSE, status); 1535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is debug code to dump the contents of the trie. It needs two functions defined above 1537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UTrie UCAt = { 0 }; 1539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t trieWord; 1540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status); 1541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAt.getFoldingOffset = myGetFoldingOffset; 1542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*status)) { 1543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) utrie_enum(&UCAt, NULL, enumRange, NULL); 1544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) trieWord = UTRIE_GET32_FROM_LEAD(&UCAt, 0xDC01); 1546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += paddedsize(mappingSize); 1549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 1552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy max expansion table */ 1554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->endExpansionCE = tableOffset; 1555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->endExpansionCECount = maxexpansion->position - 1; 1556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* not copying the first element which is a dummy */ 1557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart + tableOffset, maxexpansion->endExpansionCE + 1, 1558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (maxexpansion->position - 1) * sizeof(uint32_t)); 1559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint32_t))); 1560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->expansionCESize = tableOffset; 1561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart + tableOffset, maxexpansion->expansionCESize + 1, 1562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (maxexpansion->position - 1) * sizeof(uint8_t)); 1563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += (uint32_t)(paddedsize((maxexpansion->position)* sizeof(uint8_t))); 1564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Unsafe chars table. Finish it off, then copy it. */ 1566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_unsafeCPAddCCNZ(t, status); 1567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */ 1568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { 1569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->unsafeCP[i] |= t->UCA->unsafeCP[i]; 1570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->unsafeCP = tableOffset; 1573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart + tableOffset, t->unsafeCP, UCOL_UNSAFECP_TABLE_SIZE); 1574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); 1575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* Finish building Contraction Ending chars hash table and then copy it out. */ 1578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (t->UCA != 0) { /* Or in unsafebits from UCA, making a combined table. */ 1579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i<UCOL_UNSAFECP_TABLE_SIZE; i++) { 1580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->contrEndCP[i] |= t->UCA->contrEndCP[i]; 1581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->contrEndCP = tableOffset; 1584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(dataStart + tableOffset, t->contrEndCP, UCOL_UNSAFECP_TABLE_SIZE); 1585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tableOffset += paddedsize(UCOL_UNSAFECP_TABLE_SIZE); 1586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(tableOffset != toAllocate) { 1588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "calculation screwup!!! Expected to write %i but wrote %i instead!!!\n", toAllocate, tableOffset); 1590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *status = U_INTERNAL_PROGRAM_ERROR; 1592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(dataStart); 1593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) myData->size = tableOffset; 1597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* This should happen upon ressurection */ 1598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*const uint8_t *mapPosition = (uint8_t*)myData+myData->mappingPosition;*/ 1599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /*uprv_mstrm_close(ms);*/ 1600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return myData; 1601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)struct enumStruct { 1605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempUCATable *t; 1606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollator *tempColl; 1607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl; 1608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2Impl *nfcImpl; 1609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *closed; 1610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t noOfClosures; 1611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status; 1612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}; 1613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_BEGIN 1614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static UBool U_CALLCONV 1615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)_enumCategoryRangeClosureCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (type != U_UNASSIGNED && type != U_PRIVATE_USE_CHAR) { // if the range is assigned - we might ommit more categories later 1618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status = ((enumStruct *)context)->status; 1619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempUCATable *t = ((enumStruct *)context)->t; 1620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollator *tempColl = ((enumStruct *)context)->tempColl; 1621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl = ((enumStruct *)context)->colEl; 1622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements el; 1623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar decompBuffer[4]; 1624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *decomp; 1625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t noOfDec = 0; 1626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 u32 = 0; 1628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar comp[2]; 1629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t len = 0; 1630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(u32 = start; u32 < limit; u32++) { 1632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp = ((enumStruct *)context)->nfcImpl-> 1633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) getDecomposition(u32, decompBuffer, noOfDec); 1634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //if((noOfDec = unorm_normalize(comp, len, UNORM_NFD, 0, decomp, 256, status)) > 1 1635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) //|| (noOfDec == 1 && *decomp != (UChar)u32)) 1636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(decomp != NULL) 1637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) { 1638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) len = 0; 1639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(comp, len, u32); 1640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(ucol_strcoll(tempColl, comp, len, decomp, noOfDec) != UCOL_EQUAL) { 1641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#ifdef UCOL_DEBUG 1642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "Closure: U+%04X -> ", u32); 1643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 1644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i = 0; 1645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(i < noOfDec) { 1646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_NEXT(decomp, i, noOfDec, c); 1647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "%04X ", c); 1648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "\n"); 1650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // print CEs for code point vs. decomposition 1651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "U+%04X CEs: ", u32); 1652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements *iter = ucol_openElements(tempColl, comp, len, status); 1653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t ce; 1654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { 1655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "%08X ", ce); 1656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "\nDecomp CEs: "); 1658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_setText(iter, decomp, noOfDec, status); 1659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((ce = ucol_next(iter, status)) != UCOL_NULLORDER) { 1660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "%08X ", ce); 1661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fprintf(stderr, "\n"); 1663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_closeElements(iter); 1664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 1665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(((enumStruct *)context)->closed != NULL) { 1666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((enumStruct *)context)->closed->add(u32); 1667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((enumStruct *)context)->noOfClosures++; 1669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cPoints = (UChar *)decomp; 1670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cSize = noOfDec; 1671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.noOfCEs = 0; 1672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefix = el.prefixChars; 1673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefixSize = 0; 1674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &el); 1676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cPoints = comp; 1677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cSize = len; 1678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefix = el.prefixChars; 1679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefixSize = 0; 1680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prefix == NULL) { 1681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.noOfCEs = 0; 1682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_setText(colEl, decomp, noOfDec, status); 1683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.noOfCEs++; 1685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.noOfCEs = 1; 1688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.CEs[0] = prefix->mapCE; 1689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This character uses a prefix. We have to add it 1690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to the unsafe table, as it decomposed form is already 1691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // in. In Japanese, this happens for \u309e & \u30fe 1692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Since unsafeCPSet is static in ucol_elm, we are going 1693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // to wrap it up in the uprv_uca_unsafeCPAddCCNZ function 1694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addAnElement(t, &el, status); 1696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 1701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CDECL_END 1703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_setMapCE(tempUCATable *t, UCAElements *element, UErrorCode *status) { 1706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t expansion = 0; 1707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j; 1708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ExpansionTable *expansions = t->expansions; 1710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->noOfCEs == 2 // a two CE expansion 1711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && isContinuation(element->CEs[1]) // which is a continuation 1712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && (element->CEs[1] & (~(0xFF << 24 | UCOL_CONTINUATION_MARKER))) == 0 // that has only primaries in continuation, 1713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && (((element->CEs[0]>>8) & 0xFF) == UCOL_BYTE_COMMON) // a common secondary 1714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) && ((element->CEs[0] & 0xFF) == UCOL_BYTE_COMMON) // and a common tertiary 1715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = UCOL_SPECIAL_FLAG | (LONG_PRIMARY_TAG<<24) // a long primary special 1717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary 1718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | ((element->CEs[1]>>24) & 0xFF); // third byte of primary 1719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT) 1721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) | (((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4) 1722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) & 0xFFFFF0)); 1723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j = 1; j<(int32_t)element->noOfCEs; j++) { 1725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addExpansion(expansions, element->CEs[j], status); 1726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(element->noOfCEs <= 0xF) { 1728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) expansion |= element->noOfCEs; 1729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addExpansion(expansions, 0, status); 1731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element->mapCE = expansion; 1733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMaxExpansion(element->CEs[element->noOfCEs - 1], 1734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t)element->noOfCEs, 1735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->maxExpansions, 1736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status); 1737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_addFCD4AccentedContractions(tempUCATable *t, 1742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl, 1743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *data, 1744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t len, 1745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *el, 1746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) { 1747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar decomp[256], comp[256]; 1748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t decLen, compLen; 1749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decLen = unorm_normalize(data, len, UNORM_NFD, 0, decomp, 256, status); 1751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compLen = unorm_normalize(data, len, UNORM_NFC, 0, comp, 256, status); 1752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[decLen] = comp[compLen] = 0; 1753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cPoints = decomp; 1755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cSize = decLen; 1756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->noOfCEs = 0; 1757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefixSize = 0; 1758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefix = el->prefixChars; 1759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); 1761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cPoints = comp; 1762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cSize = compLen; 1763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefix = el->prefixChars; 1764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefixSize = 0; 1765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prefix == NULL) { 1766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->noOfCEs = 0; 1767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_setText(colEl, decomp, decLen, status); 1768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->noOfCEs++; 1770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMapCE(t, el, status); 1772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addAnElement(t, el, status); 1773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_addMultiCMContractions(tempUCATable *t, 1778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl, 1779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempTailorContext *c, 1780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *el, 1781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) { 1782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CombinClassTable *cmLookup = t->cmLookup; 1783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar newDecomp[256]; 1784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t maxComp, newDecLen; 1785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 fcdHighStart; 1786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 1787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 1788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int16_t curClass = (unorm_getFCD16(fcdTrieIndex, c->tailoringCM) & 0xff); 1791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompData *precomp = c->precomp; 1792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t compLen = c->compLen; 1793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar *comp = c->comp; 1794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxComp = c->precompLen; 1795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (int32_t j=0; j < maxComp; j++) { 1797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t count=0; 1798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( count == 0 ) { // Decompose the saved precomposed char. 1800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar temp[2]; 1801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[0]=precomp[j].cp; 1802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) temp[1]=0; 1803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecLen = unorm_normalize(temp, 1, UNORM_NFD, 0, 1804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecomp, sizeof(newDecomp)/sizeof(UChar), status); 1805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecomp[newDecLen++] = cmLookup->cPoints[c->cmPos]; 1806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { // swap 2 combining marks when they are equal. 1808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(newDecomp, c->decomp, sizeof(UChar)*(c->decompLen)); 1809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecLen = c->decompLen; 1810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecomp[newDecLen++] = precomp[j].cClass; 1811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) newDecomp[newDecLen] = 0; 1813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) compLen = unorm_normalize(newDecomp, newDecLen, UNORM_NFC, 0, 1814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) comp, 256, status); 1815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (compLen==1) { 1816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) comp[compLen++] = newDecomp[newDecLen++] = c->tailoringCM; 1817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) comp[compLen] = newDecomp[newDecLen] = 0; 1818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cPoints = newDecomp; 1819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cSize = newDecLen; 1820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, el); 1822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cPoints = c->comp; 1823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->cSize = compLen; 1824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefix = el->prefixChars; 1825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->prefixSize = 0; 1826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prefix == NULL) { 1827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->noOfCEs = 0; 1828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_setText(colEl, newDecomp, newDecLen, status); 1829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((el->CEs[el->noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el->noOfCEs++; 1831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMapCE(t, el, status); 1833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_finalizeAddition(t, el, status); 1834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save the current precomposed char and its class to find any 1836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // other combining mark combinations. 1837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precomp[c->precompLen].cp=comp[0]; 1838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precomp[c->precompLen].cClass = curClass; 1839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c->precompLen++; 1840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while (++count<2 && (precomp[j].cClass == curClass)); 1843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 1848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_addTailCanonicalClosures(tempUCATable *t, 1849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl, 1850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar baseCh, 1851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar cMark, 1852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *el, 1853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) { 1854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CombinClassTable *cmLookup = t->cmLookup; 1855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 fcdHighStart; 1856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 1857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 1858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int16_t maxIndex = (unorm_getFCD16(fcdTrieIndex, cMark) & 0xff ); 1861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements element; 1862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint16_t *index; 1863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar decomp[256]; 1864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar comp[256]; 1865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CompData precomp[256]; // precomposed array 1866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t precompLen = 0; // count for precomp 1867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, len, decompLen, curClass, replacedPos; 1868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempTailorContext c; 1869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( cmLookup == NULL ) { 1871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) index = cmLookup->index; 1874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t cClass=(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff); 1875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxIndex = (int32_t)index[(unorm_getFCD16(fcdTrieIndex, cMark) & 0xff)-1]; 1876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.comp = comp; 1877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.decomp = decomp; 1878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.precomp = precomp; 1879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.tailoringCM = cMark; 1880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (cClass>0) { 1882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxIndex = (int32_t)index[cClass-1]; 1883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 1885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) maxIndex=0; 1886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[0]=baseCh; 1888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for ( i=0; i<maxIndex ; i++ ) { 1889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[1] = cmLookup->cPoints[i]; 1890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[2]=0; 1891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decompLen=2; 1892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); 1893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (len==1) { 1894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Save the current precomposed char and its class to find any 1895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // other combining mark combinations. 1896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precomp[precompLen].cp=comp[0]; 1897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) curClass = precomp[precompLen].cClass = 1898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) index[unorm_getFCD16(fcdTrieIndex, decomp[1]) & 0xff]; 1899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precompLen++; 1900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) replacedPos=0; 1901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (decompLen=0; decompLen< (int32_t)el->cSize; decompLen++) { 1902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[decompLen] = el->cPoints[decompLen]; 1903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (decomp[decompLen]==cMark) { 1904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) replacedPos = decompLen; // record the position for later use 1905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( replacedPos != 0 ) { 1908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[replacedPos]=cmLookup->cPoints[i]; 1909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) decomp[decompLen] = 0; 1911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) len = unorm_normalize(decomp, decompLen, UNORM_NFC, 0, comp, 256, status); 1912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) comp[len++] = decomp[decompLen++] = cMark; 1913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) comp[len] = decomp[decompLen] = 0; 1914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.cPoints = decomp; 1915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.cSize = decompLen; 1916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.noOfCEs = 0; 1917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.prefix = el->prefixChars; 1918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.prefixSize = 0; 1919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements *prefix=(UCAElements *)uhash_get(t->prefixLookup, &element); 1921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.cPoints = comp; 1922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.cSize = len; 1923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.prefix = el->prefixChars; 1924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.prefixSize = 0; 1925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prefix == NULL) { 1926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.noOfCEs = 0; 1927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_setText(colEl, decomp, decompLen, status); 1928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while((element.CEs[element.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) { 1929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) element.noOfCEs++; 1930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_setMapCE(t, &element, status); 1932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_finalizeAddition(t, &element, status); 1933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // This is a fix for tailoring contractions with accented 1936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // character at the end of contraction string. 1937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ((len>2) && 1938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (unorm_getFCD16(fcdTrieIndex, comp[len-2]) & 0xff00)==0) { 1939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addFCD4AccentedContractions(t, colEl, comp, len, &element, status); 1940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (precompLen >1) { 1943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.compLen = len; 1944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.decompLen = decompLen; 1945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.precompLen = precompLen; 1946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c.cmPos = i; 1947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addMultiCMContractions(t, colEl, &c, &element, status); 1948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) precompLen = c.precompLen; 1949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC int32_t U_EXPORT2 1955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)uprv_uca_canonicalClosure(tempUCATable *t, 1956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColTokenParser *src, 1957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeSet *closed, 1958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *status) 1959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 1960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) enumStruct context; 1961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.closed = closed; 1962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.noOfClosures = 0; 1963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCAElements el; 1964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UColToken *tok; 1965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t i = 0, j = 0; 1966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar baseChar, firstCM; 1967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 fcdHighStart; 1968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *fcdTrieIndex = unorm_getFCDTrieIndex(fcdHighStart, status); 1969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.nfcImpl=Normalizer2Factory::getNFCImpl(*status); 1970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(*status)) { 1971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollator *tempColl = NULL; 1975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status); 1976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check for null pointer 1977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 1978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 1979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status); 1982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl = ucol_initCollator(tempData, 0, t->UCA, status); 1983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( tempTable->cmLookup != NULL ) { 1984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) t->cmLookup = tempTable->cmLookup; // copy over to t 1985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempTable->cmLookup = NULL; 1986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_closeTempTable(tempTable); 1988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(*status)) { 1990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->ucaRules = NULL; 1991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->actualLocale = NULL; 1992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->validLocale = NULL; 1993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->requestedLocale = NULL; 1994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->hasRealData = TRUE; 1995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tempColl->freeImageOnClose = TRUE; 1996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(tempData != 0) { 1997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_free(tempData); 1998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* produce canonical closure */ 2001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCollationElements* colEl = ucol_openElements(tempColl, NULL, 0, status); 2002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Check for null pointer 2003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(*status)) { 2004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 2005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.t = t; 2007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.tempColl = tempColl; 2008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.colEl = colEl; 2009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) context.status = status; 2010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) u_enumCharTypes(_enumCategoryRangeClosureCategory, &context); 2011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( (src==NULL) || !src->buildCCTabFlag ) { 2013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_closeElements(colEl); 2014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_close(tempColl); 2015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return context.noOfClosures; // no extra contraction needed to add 2016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for (i=0; i < src->resultLen; i++) { 2019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) baseChar = firstCM= (UChar)0; 2020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tok = src->lh[i].first; 2021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while (tok != NULL && U_SUCCESS(*status)) { 2022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefix = el.prefixChars; 2023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cPoints = el.uchars; 2024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(tok->prefix != 0) { 2025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefixSize = tok->prefix>>24; 2026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(el.prefix, src->source + (tok->prefix & 0x00FFFFFF), el.prefixSize*sizeof(UChar)); 2027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cSize = (tok->source >> 24)-(tok->prefix>>24); 2029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF)+(tok->prefix>>24) + src->source, el.cSize*sizeof(UChar)); 2030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 2031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.prefixSize = 0; 2032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *el.prefix = 0; 2033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) el.cSize = (tok->source >> 24); 2035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(el.uchars, (tok->source & 0x00FFFFFF) + src->source, el.cSize*sizeof(UChar)); 2036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(src->UCA != NULL) { 2038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j = 0; j<el.cSize; j++) { 2039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int16_t fcd = unorm_getFCD16(fcdTrieIndex, el.cPoints[j]); 2040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( (fcd & 0xff) == 0 ) { 2041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) baseChar = el.cPoints[j]; // last base character 2042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) firstCM=0; // reset combining mark value 2043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) else { 2045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( (baseChar!=0) && (firstCM==0) ) { 2046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) firstCM = el.cPoints[j]; // first combining mark 2047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if ( (baseChar!= (UChar)0) && (firstCM != (UChar)0) ) { 2052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // find all the canonical rules 2053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_uca_addTailCanonicalClosures(t, colEl, baseChar, firstCM, &el, status); 2054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) tok = tok->next; 2056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 2058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_closeElements(colEl); 2059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucol_close(tempColl); 2060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return context.noOfClosures; 2062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 2063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 2064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_COLLATION */ 2065