1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (C) 2003-2007, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: gencnvex.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003oct12 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnvmbcs.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "toolutil.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unewdata.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucm.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "makeconv.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "genmbcs.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtClose(NewConverter *cnvData); 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtIsValid(NewConverter *cnvData, 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *bytes, int32_t length); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType); 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct CnvExtData { 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NewConverter newConverter; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMFile *ucm; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* toUnicode (state table in ucm->states) */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UToolMemory *toUTable, *toUUChars; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fromUnicode */ 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UToolMemory *fromUTableUChars, *fromUTableValues, *fromUBytes; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage1[MBCS_STAGE_1_SIZE]; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage2[MBCS_STAGE_2_SIZE]; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage3[0x10000<<UCNV_EXT_STAGE_2_LEFT_SHIFT]; /* 0x10000 because of 16-bit stage 2/3 indexes */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t stage3b[0x10000]; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t stage1Top, stage2Top, stage3Top, stage3bTop; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* for stage3 compaction of <subchar1> |2 mappings */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage3Sub1Block; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* statistics */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInBytes, maxOutBytes, maxBytesPerUChar, 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInUChars, maxOutUChars, maxUCharsPerByte; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} CnvExtData; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNewConverter * 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtOpen(UCMFile *ucm) { 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData=(CnvExtData *)uprv_malloc(sizeof(CnvExtData)); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData==NULL) { 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("out of memory\n"); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(extData, 0, sizeof(CnvExtData)); 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->ucm=ucm; /* aliased, not owned */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.close=CnvExtClose; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.isValid=CnvExtIsValid; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.addTable=CnvExtAddTable; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.write=CnvExtWrite; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &extData->newConverter; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtClose(NewConverter *cnvData) { 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData=(CnvExtData *)cnvData; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData!=NULL) { 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->toUTable); 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->toUUChars); 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUTableUChars); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUTableValues); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUBytes); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* we do not expect this to be called */ 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtIsValid(NewConverter *cnvData, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *bytes, int32_t length) { 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType) { 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData=(CnvExtData *)cnvData; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length, top, headerSize; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t indexes[UCNV_EXT_INDEXES_MIN_LENGTH]={ 0 }; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(tableType&TABLE_BASE) { 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru headerSize=0; 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 }; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the header and base table name for an extension-only table */ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)uprv_strlen(extData->ucm->baseName)+1; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(length&3) { 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->ucm->baseName[length++]=0; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 133c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru headerSize=MBCS_HEADER_V4_LENGTH*4+length; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fill the header */ 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.version[0]=4; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.version[1]=2; 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the header and the base table name */ 141c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4); 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->ucm->baseName, length); 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fill indexes[] - offsets/indexes are in units of the target array */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top=0; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_INDEXES_LENGTH]=length=UCNV_EXT_INDEXES_MIN_LENGTH; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_INDEX]=top; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_LENGTH]=length=utm_countItems(extData->toUTable); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_UCHARS_INDEX]=top; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]=length=utm_countItems(extData->toUUChars); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_UCHARS_INDEX]=top; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=utm_countItems(extData->fromUTableUChars); 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&3) { 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((UChar *)utm_alloc(extData->fromUTableUChars))=0; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((uint32_t *)utm_alloc(extData->fromUTableValues))=0; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=2; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_LENGTH]=length; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_VALUES_INDEX]=top; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_BYTES_INDEX]=top; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=utm_countItems(extData->fromUBytes); 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&1) { 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((uint8_t *)utm_alloc(extData->fromUBytes))=0; 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++top; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]=length; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]=top; 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_1_LENGTH]=length=extData->stage1Top; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]=length+=extData->stage2Top; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]=top; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3Top; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&3) { 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[extData->stage3Top++]=0; 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=2; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]=length; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]=top; 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]=length=extData->stage3bTop; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_SIZE]=top; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* statistics */ 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_COUNT_BYTES]= 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxInBytes<<16)| 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxOutBytes<<8)| 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxBytesPerUChar; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_COUNT_UCHARS]= 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxInUChars<<16)| 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxOutUChars<<8)| 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxUCharsPerByte; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the extension data */ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, indexes, sizeof(indexes)); 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4); 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->toUUChars), indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]*2); 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUTableUChars), indexes[UCNV_EXT_FROM_U_LENGTH]*2); 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUTableValues), indexes[UCNV_EXT_FROM_U_LENGTH]*4); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUBytes), indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]); 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage1, extData->stage1Top*2); 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage2, extData->stage2Top*2); 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage3, extData->stage3Top*2); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage3b, extData->stage3bTop*4); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j; 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage1Top; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage1[%x]:\n", length); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage1[i]!=length) { 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage1[%04x]=%04x\n", i, extData->stage1[i]); 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=length; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage2Top; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage2[%x]:\n", length); 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++j, ++i) { 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2[i]!=0) { 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage12[%04x]=%04x\n", j, extData->stage2[i]); 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3Top; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage3[%x]:\n", length); 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3[i]!=0) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage3[%04x]=%04x\n", i, extData->stage3[i]); 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3bTop; 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage3b[%x]:\n", length); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3b[i]!=0) { 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage3b[%04x]=%08x\n", i, extData->stage3b[i]); 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(VERBOSE) { 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("size of extension data: %ld\n", (long)top); 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return the number of bytes that should have been written */ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (uint32_t)(headerSize+top); 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */ 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove fromUnicode fallbacks and SUB mappings which are irrelevant for 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the toUnicode table. 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This includes mappings with MBCS_FROM_U_EXT_FLAG which were suitable 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the base toUnicode table but not for the base fromUnicode table. 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The table must be sorted. 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Modifies previous data in the reverseMap. 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureduceToUMappings(UCMTable *table) { 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, count; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t flag; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=table->mappingsLength; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* leave the map alone for the initial mappings with desired flags */ 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; i<count; ++i) { 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=mappings[map[i]].f; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag!=0 && flag!=3) { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reduce from here to the rest */ 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=i; i<count; ++i) { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=mappings[map[i]].f; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag==0 || flag==3) { 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map[j++]=map[i]; 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return j; 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetToUnicodeValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *u32; 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *u; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t u16Length, ratio; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the Unicode result code point or string index */ 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(m->u); 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u); 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */ 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the result code point string and its 16-bit string length */ 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u32=UCM_GET_CODE_POINTS(table, m); 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode); 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate it and put its length and index into the value */ 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value= 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (((uint32_t)m->uLen+UCNV_EXT_TO_U_LENGTH_OFFSET)<<UCNV_EXT_TO_U_LENGTH_SHIFT)| 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)utm_countItems(extData->toUUChars)); 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u=utm_allocN(extData->toUUChars, u16Length); 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result 16-bit string */ 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==0) { 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update statistics */ 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen>extData->maxInBytes) { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxInBytes=m->bLen; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u16Length>extData->maxOutUChars) { 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxOutUChars=u16Length; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ratio=(u16Length+(m->bLen-1))/m->bLen; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ratio>extData->maxUCharsPerByte) { 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxUCharsPerByte=ratio; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return value; 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Recursive toUTable generator core function. 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Preconditions: 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - start<limit (There is at least one mapping.) 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - The mappings are sorted lexically. (Access is through the reverseMap.) 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - All mappings between start and limit have input sequences that share 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the same prefix of unitIndex length, and therefore all of these sequences 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are at least unitIndex+1 long. 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - There are only relevant mappings available through the reverseMap, 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * see reduceToUMappings(). 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * One function invocation generates one section table. 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Steps: 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. Count the number of unique unit values and get the low/high unit values 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that occur at unitIndex. 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. Allocate the section table with possible optimization for linear access. 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3. Write temporary version of the section table with start indexes of 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * subsections, each corresponding to one unit value at unitIndex. 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4. Iterate through the table once more, and depending on the subsection length: 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0: write 0 as a result value (unused byte in linear-access section table) 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * >0: if there is one mapping with an input unit sequence of unitIndex+1 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then defaultValue=compute the mapping result for this whole sequence 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * else defaultValue=0 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recurse into the subsection 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateToUTable(CnvExtData *extData, UCMTable *table, 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, int32_t limit, int32_t unitIndex, 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t defaultValue) { 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, uniqueCount, count, subStart, subLimit; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes; 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t low, high, prev; 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *section; 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 1: examine the input units; set low, high, uniqueCount */ 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[start]; 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low=bytes[unitIndex]; 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uniqueCount=1; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high=low; 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start+1; i<limit; ++i) { 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=bytes[unitIndex]; 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++uniqueCount; 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 2: allocate the section; set count, section */ 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=(high-low)+1; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count<0x100 && (unitIndex==0 || uniqueCount>=(3*count)/4)) { 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the root table and for fairly full tables: 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocate for direct, linear array access 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by keeping count, to write an entry for each unit value 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from low to high 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * exception: use a compact table if count==0x100 because 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that cannot be encoded in the length byte 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=uniqueCount; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>=0x100) { 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: toUnicode extension table section overflow: %ld section entries\n", (long)count); 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the section: 1 entry for the header + count for the items */ 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section=(uint32_t *)utm_allocN(extData->toUTable, 1+count); 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the section header */ 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *section++=((uint32_t)count<<UCNV_EXT_TO_U_BYTE_SHIFT)|defaultValue; 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 3: write temporary section table with subsection starts */ 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=low-1; /* just before low to prevent empty subsections before low */ 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; /* section table index */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start; i<limit; ++i) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=bytes[unitIndex]; 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start of a new subsection for unit high */ 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>uniqueCount) { 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write empty subsections for unused units in a linear table */ 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(++prev<high) { 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j++]=((uint32_t)prev<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the entry with the subsection start */ 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j++]=((uint32_t)high<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assert(j==count) */ 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 4: recurse and write results */ 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=UCNV_EXT_TO_U_GET_VALUE(section[0]); 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<count; ++j) { 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit= (j+1)<count ? UCNV_EXT_TO_U_GET_VALUE(section[j+1]) : limit; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remove the subStart temporary value */ 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]&=~UCNV_EXT_TO_U_VALUE_MASK; 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* leave the value zero: empty subsection for unused unit in a linear table */ 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if there is exactly one input unit sequence of length unitIndex+1 */ 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=0; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen==unitIndex+1) { 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateToUTable() */ 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].bLen==unitIndex+1) { 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same bytes\n"); 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=getToUnicodeValue(extData, table, m); 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for the input sequence ending here */ 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]|=defaultValue; 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]|=(uint32_t)utm_countItems(extData->toUTable); 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse */ 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateToUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate the toUTable and toUUChars from the input table. 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The input table must be sorted, and all precision flags must be 0..3. 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function will modify the table's reverseMap. 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumakeToUTable(CnvExtData *extData, UCMTable *table) { 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t toUCount; 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUCount=reduceToUMappings(table); 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->toUTable=utm_open("cnv extension toUTable", 0x10000, UCNV_EXT_TO_U_MIN_CODE_POINT, 4); 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->toUUChars=utm_open("cnv extension toUUChars", 0x10000, UCNV_EXT_TO_U_INDEX_MASK+1, 2); 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return generateToUTable(extData, table, 0, toUCount, 0, 0); 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */ 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preprocessing: 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rebuild reverseMap with mapping indexes for mappings relevant for from Unicode 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change each Unicode string to encode all but the first code point in 16-bit form 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * generation: 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for each unique code point 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * write an entry in the 3-stage trie 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * check that there is only one single-code point sequence 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * start recursion for following 16-bit input units 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove toUnicode fallbacks and non-<subchar1> SUB mappings 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which are irrelevant for the fromUnicode extension table. 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove MBCS_FROM_U_EXT_FLAG bits. 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Overwrite the reverseMap with an index array to the relevant mappings. 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Modify the code point sequences to a generator-friendly format where 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the first code points remains unchanged but the following are recoded 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into 16-bit Unicode string form. 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The table must be sorted. 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destroys previous data in the reverseMap. 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprepareFromUMappings(UCMTable *table) { 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, count; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t flag; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=table->mappingsLength; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we do not go through the map on input because the mappings are 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sorted lexically 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; i<count; ++m, ++i) { 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=m->f; 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag>=0) { 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag&=MBCS_FROM_U_EXT_MASK; 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m->f=flag; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag==0 || flag==1 || (flag==2 && m->bLen==1)) { 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map[j++]=i; 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen>1) { 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recode all but the first code point to 16-bit Unicode */ 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *u32; 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *u; 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t q, r; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u32=UCM_GET_CODE_POINTS(table, m); 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u=(UChar *)u32; /* destructive in-place recoding */ 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(r=2, q=1; q<m->uLen; ++q) { 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u32[q]; 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND_UNSAFE(u, r, c); 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */ 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m->uLen=(int8_t)r; 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return j; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFromUBytesValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes, *resultBytes; 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t u16Length, ratio; 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==2) { 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no mapping, <subchar1> preferred 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no need to count in statistics because the subchars are already 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * counted for maxOutBytes and maxBytesPerUChar in UConverterStaticData, 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and this non-mapping does not count for maxInUChars which are always 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trivially at least two if counting unmappable supplementary code points 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_FROM_U_SUBCHAR1; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=0; 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(m->bLen) { 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1..3: store the bytes in the value word */ 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=((uint32_t)*bytes++)<<16; 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=((uint32_t)*bytes++)<<8; 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=*bytes; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the parser enforces m->bLen<=UCNV_EXT_MAX_BYTES */ 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the bytes in fromUBytes[] and the index in the value word */ 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)utm_countItems(extData->fromUBytes); 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultBytes=utm_allocN(extData->fromUBytes, m->bLen); 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(resultBytes, bytes, m->bLen); 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=(uint32_t)m->bLen<<UCNV_EXT_FROM_U_LENGTH_SHIFT; 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==0) { 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=UCNV_EXT_FROM_U_ROUNDTRIP_FLAG; 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the real UTF-16 length (see recoding in prepareFromUMappings()) */ 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(m->u); 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(UCM_GET_CODE_POINTS(table, m)[0])+(m->uLen-2); 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update statistics */ 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u16Length>extData->maxInUChars) { 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxInUChars=u16Length; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen>extData->maxOutBytes) { 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxOutBytes=m->bLen; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ratio=(m->bLen+(u16Length-1))/u16Length; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ratio>extData->maxBytesPerUChar) { 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxBytesPerUChar=ratio; 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return value; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * works like generateToUTable(), except that the 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * output section consists of two arrays, one for input UChars and one 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for result values 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * also, fromUTable sections are always stored in a compact form for 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * access via binary search 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateFromUTable(CnvExtData *extData, UCMTable *table, 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, int32_t limit, int32_t unitIndex, 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t defaultValue) { 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, uniqueCount, count, subStart, subLimit; 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *uchars; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 low, high, prev; 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *sectionUChars; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *sectionValues; 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 1: examine the input units; set low, high, uniqueCount */ 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[start]; 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low=uchars[unitIndex]; 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uniqueCount=1; 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high=low; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start+1; i<limit; ++i) { 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=uchars[unitIndex]; 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++uniqueCount; 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 2: allocate the section; set count, section */ 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the fromUTable always stores for access via binary search */ 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=uniqueCount; 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the section: 1 entry for the header + count for the items */ 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionUChars=(UChar *)utm_allocN(extData->fromUTableUChars, 1+count); 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues=(uint32_t *)utm_allocN(extData->fromUTableValues, 1+count); 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the section header */ 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *sectionUChars++=(UChar)count; 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *sectionValues++=defaultValue; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 3: write temporary section table with subsection starts */ 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=low-1; /* just before low to prevent empty subsections before low */ 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; /* section table index */ 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start; i<limit; ++i) { 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=uchars[unitIndex]; 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start of a new subsection for unit high */ 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the entry with the subsection start */ 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionUChars[j]=(UChar)high; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=(uint32_t)i; 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++j; 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assert(j==count) */ 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 4: recurse and write results */ 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=(int32_t)(sectionValues[0]); 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<count; ++j) { 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit= (j+1)<count ? (int32_t)(sectionValues[j+1]) : limit; 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if there is exactly one input unit sequence of length unitIndex+1 */ 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=0; 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==unitIndex+1) { 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateToUTable() */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].uLen==unitIndex+1) { 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same Unicode code points\n"); 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=getFromUBytesValue(extData, table, m); 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for the input sequence ending here */ 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=defaultValue; 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=(uint32_t)utm_countItems(extData->fromUTableValues); 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse */ 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * add entries to the fromUnicode trie, 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume to be called with code points in ascending order 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and use that to build the trie in precompacted form 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruaddFromUTrieEntry(CnvExtData *extData, UChar32 c, uint32_t value) { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i1, i2, i3, i3b, nextOffset, min, newBlock; 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * compute the index for each stage, 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocate a stage block if necessary, 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and write the stage value 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i1=c>>10; 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i1>=extData->stage1Top) { 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage1Top=i1+1; 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextOffset=(c>>4)&0x3f; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage1[i1]==0) { 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate another block in stage 2; overlap with the previous block */ 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=extData->stage2Top; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(min<newBlock && extData->stage2[newBlock-1]==0) { 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --newBlock; 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage1[i1]=(uint16_t)newBlock; 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2Top=newBlock+MBCS_STAGE_2_BLOCK_SIZE; 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2Top>LENGTHOF(extData->stage2)) { 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 2 entries at U+%04x\n", (int)c); 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i2=extData->stage1[i1]+nextOffset; 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextOffset=c&0xf; 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2[i2]==0) { 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate another block in stage 3; overlap with the previous block */ 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=extData->stage3Top; 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(min<newBlock && extData->stage3[newBlock-1]==0) { 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --newBlock; 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* round up to a multiple of stage 3 granularity >1 (similar to utrie.c) */ 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=(newBlock+(UCNV_EXT_STAGE_3_GRANULARITY-1))&~(UCNV_EXT_STAGE_3_GRANULARITY-1); 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2[i2]=(uint16_t)(newBlock>>UCNV_EXT_STAGE_2_LEFT_SHIFT); 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top=newBlock+MBCS_STAGE_3_BLOCK_SIZE; 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3Top>LENGTHOF(extData->stage3)) { 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 3 entries at U+%04x\n", (int)c); 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i3=((int32_t)extData->stage2[i2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)+nextOffset; 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume extData->stage3[i3]==0 because we get 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * code points in strictly ascending order 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==UCNV_EXT_FROM_U_SUBCHAR1) { 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* <subchar1> SUB mapping, see getFromUBytesValue() and prepareFromUMappings() */ 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[i3]=1; 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * precompaction is not optimal for <subchar1> |2 mappings because 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stage3 values for them are all the same, unlike for other mappings 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which all have unique values; 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * use a simple compaction of reusing a whole block filled with these 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mappings 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is the entire block filled with <subchar1> |2 mappings? */ 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(nextOffset==MBCS_STAGE_3_BLOCK_SIZE-1) { 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(min=i3-nextOffset; 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min<i3 && extData->stage3[min]==1; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++min) {} 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(min==i3) { 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the entire block is filled with these mappings */ 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3Sub1Block!=0) { 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* point to the previous such block and remove this block from stage3 */ 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2[i2]=extData->stage3Sub1Block; 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top-=MBCS_STAGE_3_BLOCK_SIZE; 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(extData->stage3+extData->stage3Top, 0, MBCS_STAGE_3_BLOCK_SIZE*2); 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember this block's stage2 entry */ 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Sub1Block=extData->stage2[i2]; 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((i3b=extData->stage3bTop++)>=LENGTHOF(extData->stage3b)) { 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 3b entries at U+%04x\n", (int)c); 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* roundtrip or fallback mapping */ 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[i3]=(uint16_t)i3b; 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3b[i3b]=value; 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateFromUTrie(CnvExtData *extData, UCMTable *table, int32_t mapLength) { 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t subStart, subLimit; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *codePoints; 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, next; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mapLength==0) { 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * iterate over same-initial-code point mappings, 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * enter the initial code point into the trie, 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and start a recursion on the corresponding mappings section 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with generateFromUTable() 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[0]; 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru next=codePoints[0]; 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=0; 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(subLimit<mapLength) { 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a new subsection of mappings starting with the same code point */ 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=next; 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(next==c && ++subLimit<mapLength) { 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subLimit]; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru next=codePoints[0]; 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * compute the value for this code point; 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if there is a mapping for this code point alone, it is at subStart 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because the table is sorted lexically 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=0; 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateFromUTable() */ 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].uLen==1) { 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same Unicode code points\n"); 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=getFromUBytesValue(extData, table, m); 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for this one code point */ 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru addFromUTrieEntry(extData, c, value); 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru addFromUTrieEntry(extData, c, (uint32_t)utm_countItems(extData->fromUTableValues)); 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse, starting from 16-bit-unit index 2, the first 16-bit unit after c */ 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTable(extData, table, subStart, subLimit, 2, value)) { 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate the fromU data structures from the input table. 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The input table must be sorted, and all precision flags must be 0..3. 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function will modify the table's reverseMap. 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumakeFromUTable(CnvExtData *extData, UCMTable *table) { 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t *stage1; 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, stage1Top, fromUCount; 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUCount=prepareFromUMappings(table); 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUTableUChars=utm_open("cnv extension fromUTableUChars", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 2); 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUTableValues=utm_open("cnv extension fromUTableValues", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 4); 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUBytes=utm_open("cnv extension fromUBytes", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 1); 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate all-unassigned stage blocks */ 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2Top=MBCS_STAGE_2_FIRST_ASSIGNED; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top=MBCS_STAGE_3_FIRST_ASSIGNED; 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stage 3b stores only unique values, and in 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index 0: 0 for "no mapping" 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index 1: "no mapping" with preference for <subchar1> rather than <subchar> 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3b[1]=UCNV_EXT_FROM_U_SUBCHAR1; 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3bTop=2; 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the first entry in the fromUTable because index 0 means "no result" */ 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_alloc(extData->fromUTableUChars); 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_alloc(extData->fromUTableValues); 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTrie(extData, table, fromUCount)) { 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset the stage 1 trie entries by stage1Top because they will 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be stored in a single array 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1=extData->stage1; 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1Top=extData->stage1Top; 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<stage1Top; ++i) { 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1[i]=(uint16_t)(stage1[i]+stage1Top); 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* -------------------------------------------------------------------------- */ 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData) { 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData; 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(table->unicodeMask&UCNV_HAS_SURROGATES) { 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: contains mappings for surrogate code points\n"); 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru staticData->conversionType=UCNV_MBCS; 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData=(CnvExtData *)cnvData; 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume that the table is sorted 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call the functions in this order because 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * makeToUTable() modifies the original reverseMap, 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * makeFromUTable() writes a whole new mapping into reverseMap 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru makeToUTable(extData, table) && 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru makeFromUTable(extData, table); 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1077