1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2003-2013, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: gencnvex.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003oct12 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ustring.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnvmbcs.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "toolutil.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unewdata.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucm.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "makeconv.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "genmbcs.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtClose(NewConverter *cnvData); 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtIsValid(NewConverter *cnvData, 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *bytes, int32_t length); 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType); 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct CnvExtData { 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NewConverter newConverter; 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMFile *ucm; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* toUnicode (state table in ucm->states) */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UToolMemory *toUTable, *toUUChars; 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fromUnicode */ 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UToolMemory *fromUTableUChars, *fromUTableValues, *fromUBytes; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage1[MBCS_STAGE_1_SIZE]; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage2[MBCS_STAGE_2_SIZE]; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage3[0x10000<<UCNV_EXT_STAGE_2_LEFT_SHIFT]; /* 0x10000 because of 16-bit stage 2/3 indexes */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t stage3b[0x10000]; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t stage1Top, stage2Top, stage3Top, stage3bTop; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* for stage3 compaction of <subchar1> |2 mappings */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t stage3Sub1Block; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* statistics */ 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInBytes, maxOutBytes, maxBytesPerUChar, 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxInUChars, maxOutUChars, maxUCharsPerByte; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} CnvExtData; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNewConverter * 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtOpen(UCMFile *ucm) { 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData=(CnvExtData *)uprv_malloc(sizeof(CnvExtData)); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData==NULL) { 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("out of memory\n"); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(extData, 0, sizeof(CnvExtData)); 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->ucm=ucm; /* aliased, not owned */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.close=CnvExtClose; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.isValid=CnvExtIsValid; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.addTable=CnvExtAddTable; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->newConverter.write=CnvExtWrite; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return &extData->newConverter; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtClose(NewConverter *cnvData) { 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData=(CnvExtData *)cnvData; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData!=NULL) { 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->toUTable); 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->toUUChars); 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUTableUChars); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUTableValues); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_close(extData->fromUBytes); 10354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_free(extData); 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* we do not expect this to be called */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtIsValid(NewConverter *cnvData, 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *bytes, int32_t length) { 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType) { 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData=(CnvExtData *)cnvData; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length, top, headerSize; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t indexes[UCNV_EXT_INDEXES_MIN_LENGTH]={ 0 }; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(tableType&TABLE_BASE) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru headerSize=0; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _MBCSHeader header={ { 0, 0, 0, 0 }, 0, 0, 0, 0, 0, 0, 0 }; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the header and base table name for an extension-only table */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=(int32_t)uprv_strlen(extData->ucm->baseName)+1; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(length&3) { 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->ucm->baseName[length++]=0; 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru headerSize=MBCS_HEADER_V4_LENGTH*4+length; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fill the header */ 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.version[0]=4; 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.version[1]=2; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru header.flags=(uint32_t)((headerSize<<8)|MBCS_OUTPUT_EXT_ONLY); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the header and the base table name */ 142c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru udata_writeBlock(pData, &header, MBCS_HEADER_V4_LENGTH*4); 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->ucm->baseName, length); 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fill indexes[] - offsets/indexes are in units of the target array */ 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top=0; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_INDEXES_LENGTH]=length=UCNV_EXT_INDEXES_MIN_LENGTH; 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_INDEX]=top; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_LENGTH]=length=utm_countItems(extData->toUTable); 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_UCHARS_INDEX]=top; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]=length=utm_countItems(extData->toUUChars); 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_UCHARS_INDEX]=top; 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=utm_countItems(extData->fromUTableUChars); 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&3) { 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((UChar *)utm_alloc(extData->fromUTableUChars))=0; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((uint32_t *)utm_alloc(extData->fromUTableValues))=0; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=2; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_LENGTH]=length; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_VALUES_INDEX]=top; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_BYTES_INDEX]=top; 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=utm_countItems(extData->fromUBytes); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&1) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *((uint8_t *)utm_alloc(extData->fromUBytes))=0; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++top; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]=length; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]=top; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_1_LENGTH]=length=extData->stage1Top; 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]=length+=extData->stage2Top; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]=top; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3Top; 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*2; 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(top&3) { 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add padding */ 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[extData->stage3Top++]=0; 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=2; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]=length; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]=top; 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]=length=extData->stage3bTop; 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru top+=length*4; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_SIZE]=top; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* statistics */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_COUNT_BYTES]= 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxInBytes<<16)| 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxOutBytes<<8)| 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxBytesPerUChar; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_COUNT_UCHARS]= 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxInUChars<<16)| 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (extData->maxOutUChars<<8)| 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxUCharsPerByte; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UCNV_EXT_FLAGS]=extData->ucm->ext->unicodeMask; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the extension data */ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, indexes, sizeof(indexes)); 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->toUTable), indexes[UCNV_EXT_TO_U_LENGTH]*4); 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->toUUChars), indexes[UCNV_EXT_TO_U_UCHARS_LENGTH]*2); 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUTableUChars), indexes[UCNV_EXT_FROM_U_LENGTH]*2); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUTableValues), indexes[UCNV_EXT_FROM_U_LENGTH]*4); 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, utm_getStart(extData->fromUBytes), indexes[UCNV_EXT_FROM_U_BYTES_LENGTH]); 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage1, extData->stage1Top*2); 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage2, extData->stage2Top*2); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage3, extData->stage3Top*2); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, extData->stage3b, extData->stage3bTop*4); 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage1Top; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage1[%x]:\n", length); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage1[i]!=length) { 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage1[%04x]=%04x\n", i, extData->stage1[i]); 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=length; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage2Top; 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage2[%x]:\n", length); 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++j, ++i) { 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2[i]!=0) { 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage12[%04x]=%04x\n", j, extData->stage2[i]); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3Top; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage3[%x]:\n", length); 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3[i]!=0) { 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage3[%04x]=%04x\n", i, extData->stage3[i]); 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=extData->stage3bTop; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("\nstage3b[%x]:\n", length); 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; ++i) { 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3b[i]!=0) { 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("stage3b[%04x]=%08x\n", i, extData->stage3b[i]); 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(VERBOSE) { 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("size of extension data: %ld\n", (long)top); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return the number of bytes that should have been written */ 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (uint32_t)(headerSize+top); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */ 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove fromUnicode fallbacks and SUB mappings which are irrelevant for 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the toUnicode table. 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This includes mappings with MBCS_FROM_U_EXT_FLAG which were suitable 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the base toUnicode table but not for the base fromUnicode table. 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The table must be sorted. 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Modifies previous data in the reverseMap. 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerureduceToUMappings(UCMTable *table) { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, count; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t flag; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=table->mappingsLength; 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* leave the map alone for the initial mappings with desired flags */ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; i<count; ++i) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=mappings[map[i]].f; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag!=0 && flag!=3) { 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reduce from here to the rest */ 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=i; i<count; ++i) { 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=mappings[map[i]].f; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag==0 || flag==3) { 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map[j++]=map[i]; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return j; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetToUnicodeValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *u32; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *u; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t u16Length, ratio; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode; 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the Unicode result code point or string index */ 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(m->u); 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)(UCNV_EXT_TO_U_MIN_CODE_POINT+m->u); 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the parser enforces m->uLen<=UCNV_EXT_MAX_UCHARS */ 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get the result code point string and its 16-bit string length */ 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u32=UCM_GET_CODE_POINTS(table, m); 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strFromUTF32(NULL, 0, &u16Length, u32, m->uLen, &errorCode); 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate it and put its length and index into the value */ 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value= 35354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius (((uint32_t)u16Length+UCNV_EXT_TO_U_LENGTH_OFFSET)<<UCNV_EXT_TO_U_LENGTH_SHIFT)| 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)utm_countItems(extData->toUUChars)); 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u=utm_allocN(extData->toUUChars, u16Length); 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result 16-bit string */ 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_strFromUTF32(u, u16Length, NULL, u32, m->uLen, &errorCode); 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) { 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==0) { 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=UCNV_EXT_TO_U_ROUNDTRIP_FLAG; 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update statistics */ 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen>extData->maxInBytes) { 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxInBytes=m->bLen; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u16Length>extData->maxOutUChars) { 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxOutUChars=u16Length; 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ratio=(u16Length+(m->bLen-1))/m->bLen; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ratio>extData->maxUCharsPerByte) { 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxUCharsPerByte=ratio; 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return value; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Recursive toUTable generator core function. 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Preconditions: 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - start<limit (There is at least one mapping.) 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - The mappings are sorted lexically. (Access is through the reverseMap.) 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - All mappings between start and limit have input sequences that share 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the same prefix of unitIndex length, and therefore all of these sequences 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * are at least unitIndex+1 long. 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - There are only relevant mappings available through the reverseMap, 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * see reduceToUMappings(). 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * One function invocation generates one section table. 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Steps: 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. Count the number of unique unit values and get the low/high unit values 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that occur at unitIndex. 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. Allocate the section table with possible optimization for linear access. 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3. Write temporary version of the section table with start indexes of 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * subsections, each corresponding to one unit value at unitIndex. 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4. Iterate through the table once more, and depending on the subsection length: 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0: write 0 as a result value (unused byte in linear-access section table) 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * >0: if there is one mapping with an input unit sequence of unitIndex+1 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * then defaultValue=compute the mapping result for this whole sequence 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * else defaultValue=0 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * recurse into the subsection 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateToUTable(CnvExtData *extData, UCMTable *table, 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, int32_t limit, int32_t unitIndex, 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t defaultValue) { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, uniqueCount, count, subStart, subLimit; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes; 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t low, high, prev; 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *section; 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 1: examine the input units; set low, high, uniqueCount */ 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[start]; 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low=bytes[unitIndex]; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uniqueCount=1; 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high=low; 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start+1; i<limit; ++i) { 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=bytes[unitIndex]; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++uniqueCount; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 2: allocate the section; set count, section */ 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=(high-low)+1; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count<0x100 && (unitIndex==0 || uniqueCount>=(3*count)/4)) { 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for the root table and for fairly full tables: 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocate for direct, linear array access 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by keeping count, to write an entry for each unit value 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * from low to high 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * exception: use a compact table if count==0x100 because 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that cannot be encoded in the length byte 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=uniqueCount; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>=0x100) { 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: toUnicode extension table section overflow: %ld section entries\n", (long)count); 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the section: 1 entry for the header + count for the items */ 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section=(uint32_t *)utm_allocN(extData->toUTable, 1+count); 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the section header */ 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *section++=((uint32_t)count<<UCNV_EXT_TO_U_BYTE_SHIFT)|defaultValue; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 3: write temporary section table with subsection starts */ 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=low-1; /* just before low to prevent empty subsections before low */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; /* section table index */ 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start; i<limit; ++i) { 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=bytes[unitIndex]; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start of a new subsection for unit high */ 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count>uniqueCount) { 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write empty subsections for unused units in a linear table */ 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(++prev<high) { 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j++]=((uint32_t)prev<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the entry with the subsection start */ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j++]=((uint32_t)high<<UCNV_EXT_TO_U_BYTE_SHIFT)|(uint32_t)i; 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assert(j==count) */ 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 4: recurse and write results */ 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=UCNV_EXT_TO_U_GET_VALUE(section[0]); 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<count; ++j) { 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit= (j+1)<count ? UCNV_EXT_TO_U_GET_VALUE(section[j+1]) : limit; 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remove the subStart temporary value */ 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]&=~UCNV_EXT_TO_U_VALUE_MASK; 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* leave the value zero: empty subsection for unused unit in a linear table */ 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if there is exactly one input unit sequence of length unitIndex+1 */ 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=0; 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen==unitIndex+1) { 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateToUTable() */ 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].bLen==unitIndex+1) { 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same bytes\n"); 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=getToUnicodeValue(extData, table, m); 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for the input sequence ending here */ 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]|=defaultValue; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru section[j]|=(uint32_t)utm_countItems(extData->toUTable); 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse */ 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateToUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate the toUTable and toUUChars from the input table. 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The input table must be sorted, and all precision flags must be 0..3. 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function will modify the table's reverseMap. 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumakeToUTable(CnvExtData *extData, UCMTable *table) { 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t toUCount; 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUCount=reduceToUMappings(table); 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->toUTable=utm_open("cnv extension toUTable", 0x10000, UCNV_EXT_TO_U_MIN_CODE_POINT, 4); 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->toUUChars=utm_open("cnv extension toUUChars", 0x10000, UCNV_EXT_TO_U_INDEX_MASK+1, 2); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return generateToUTable(extData, table, 0, toUCount, 0, 0); 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */ 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preprocessing: 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * rebuild reverseMap with mapping indexes for mappings relevant for from Unicode 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * change each Unicode string to encode all but the first code point in 16-bit form 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * generation: 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for each unique code point 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * write an entry in the 3-stage trie 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * check that there is only one single-code point sequence 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * start recursion for following 16-bit input units 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove toUnicode fallbacks and non-<subchar1> SUB mappings 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which are irrelevant for the fromUnicode extension table. 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Remove MBCS_FROM_U_EXT_FLAG bits. 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Overwrite the reverseMap with an index array to the relevant mappings. 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Modify the code point sequences to a generator-friendly format where 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the first code points remains unchanged but the following are recoded 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * into 16-bit Unicode string form. 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The table must be sorted. 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destroys previous data in the reverseMap. 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruprepareFromUMappings(UCMTable *table) { 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, count; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t flag; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=table->mappingsLength; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * we do not go through the map on input because the mappings are 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * sorted lexically 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=j=0; i<count; ++m, ++i) { 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag=m->f; 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flag>=0) { 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flag&=MBCS_FROM_U_EXT_MASK; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m->f=flag; 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 6098393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(flag==0 || flag==1 || (flag==2 && m->bLen==1) || flag==4) { 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map[j++]=i; 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen>1) { 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recode all but the first code point to 16-bit Unicode */ 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *u32; 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *u; 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t q, r; 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u32=UCM_GET_CODE_POINTS(table, m); 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u=(UChar *)u32; /* destructive in-place recoding */ 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(r=2, q=1; q<m->uLen; ++q) { 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=u32[q]; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND_UNSAFE(u, r, c); 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* counts the first code point always at 2 - the first 16-bit unit is at 16-bit index 2 */ 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m->uLen=(int8_t)r; 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return j; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFromUBytesValue(CnvExtData *extData, UCMTable *table, UCMapping *m) { 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *bytes, *resultBytes; 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t u16Length, ratio; 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==2) { 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no mapping, <subchar1> preferred 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no need to count in statistics because the subchars are already 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * counted for maxOutBytes and maxBytesPerUChar in UConverterStaticData, 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and this non-mapping does not count for maxInUChars which are always 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trivially at least two if counting unmappable supplementary code points 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_FROM_U_SUBCHAR1; 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru bytes=UCM_GET_BYTES(table, m); 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=0; 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(m->bLen) { 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1..3: store the bytes in the value word */ 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=((uint32_t)*bytes++)<<16; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=((uint32_t)*bytes++)<<8; 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=*bytes; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the parser enforces m->bLen<=UCNV_EXT_MAX_BYTES */ 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the bytes in fromUBytes[] and the index in the value word */ 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)utm_countItems(extData->fromUBytes); 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru resultBytes=utm_allocN(extData->fromUBytes, m->bLen); 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(resultBytes, bytes, m->bLen); 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=(uint32_t)m->bLen<<UCNV_EXT_FROM_U_LENGTH_SHIFT; 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->f==0) { 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value|=UCNV_EXT_FROM_U_ROUNDTRIP_FLAG; 6758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else if(m->f==4) { 6768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius value|=UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG; 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the real UTF-16 length (see recoding in prepareFromUMappings()) */ 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(m->u); 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u16Length=U16_LENGTH(UCM_GET_CODE_POINTS(table, m)[0])+(m->uLen-2); 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* update statistics */ 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u16Length>extData->maxInUChars) { 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxInUChars=u16Length; 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->bLen>extData->maxOutBytes) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxOutBytes=m->bLen; 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ratio=(m->bLen+(u16Length-1))/u16Length; 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ratio>extData->maxBytesPerUChar) { 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->maxBytesPerUChar=ratio; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return value; 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * works like generateToUTable(), except that the 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * output section consists of two arrays, one for input UChars and one 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for result values 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * also, fromUTable sections are always stored in a compact form for 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * access via binary search 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateFromUTable(CnvExtData *extData, UCMTable *table, 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, int32_t limit, int32_t unitIndex, 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t defaultValue) { 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, j, uniqueCount, count, subStart, subLimit; 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *uchars; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 low, high, prev; 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar *sectionUChars; 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *sectionValues; 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 1: examine the input units; set low, high, uniqueCount */ 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[start]; 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru low=uchars[unitIndex]; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uniqueCount=1; 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high=low; 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start+1; i<limit; ++i) { 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=uchars[unitIndex]; 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++uniqueCount; 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 2: allocate the section; set count, section */ 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the fromUTable always stores for access via binary search */ 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=uniqueCount; 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the section: 1 entry for the header + count for the items */ 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionUChars=(UChar *)utm_allocN(extData->fromUTableUChars, 1+count); 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues=(uint32_t *)utm_allocN(extData->fromUTableValues, 1+count); 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the section header */ 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *sectionUChars++=(UChar)count; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *sectionValues++=defaultValue; 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 3: write temporary section table with subsection starts */ 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=low-1; /* just before low to prevent empty subsections before low */ 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru j=0; /* section table index */ 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=start; i<limit; ++i) { 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[i]; 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uchars=(UChar *)UCM_GET_CODE_POINTS(table, m); 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru high=uchars[unitIndex]; 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(high!=prev) { 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start of a new subsection for unit high */ 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru prev=high; 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the entry with the subsection start */ 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionUChars[j]=(UChar)high; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=(uint32_t)i; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++j; 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* assert(j==count) */ 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* step 4: recurse and write results */ 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=(int32_t)(sectionValues[0]); 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<count; ++j) { 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit= (j+1)<count ? (int32_t)(sectionValues[j+1]) : limit; 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* see if there is exactly one input unit sequence of length unitIndex+1 */ 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=0; 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==unitIndex+1) { 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateToUTable() */ 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].uLen==unitIndex+1) { 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same Unicode code points\n"); 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru defaultValue=getFromUBytesValue(extData, table, m); 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for the input sequence ending here */ 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=defaultValue; 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sectionValues[j]=(uint32_t)utm_countItems(extData->fromUTableValues); 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse */ 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTable(extData, table, subStart, subLimit, unitIndex+1, defaultValue)) { 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * add entries to the fromUnicode trie, 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume to be called with code points in ascending order 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and use that to build the trie in precompacted form 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruaddFromUTrieEntry(CnvExtData *extData, UChar32 c, uint32_t value) { 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i1, i2, i3, i3b, nextOffset, min, newBlock; 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * compute the index for each stage, 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * allocate a stage block if necessary, 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and write the stage value 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i1=c>>10; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i1>=extData->stage1Top) { 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage1Top=i1+1; 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextOffset=(c>>4)&0x3f; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage1[i1]==0) { 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate another block in stage 2; overlap with the previous block */ 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=extData->stage2Top; 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(min<newBlock && extData->stage2[newBlock-1]==0) { 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --newBlock; 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage1[i1]=(uint16_t)newBlock; 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2Top=newBlock+MBCS_STAGE_2_BLOCK_SIZE; 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2Top>LENGTHOF(extData->stage2)) { 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 2 entries at U+%04x\n", (int)c); 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i2=extData->stage1[i1]+nextOffset; 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru nextOffset=c&0xf; 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage2[i2]==0) { 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate another block in stage 3; overlap with the previous block */ 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=extData->stage3Top; 864ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(min<newBlock && extData->stage3[newBlock-1]==0) { 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --newBlock; 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* round up to a multiple of stage 3 granularity >1 (similar to utrie.c) */ 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newBlock=(newBlock+(UCNV_EXT_STAGE_3_GRANULARITY-1))&~(UCNV_EXT_STAGE_3_GRANULARITY-1); 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2[i2]=(uint16_t)(newBlock>>UCNV_EXT_STAGE_2_LEFT_SHIFT); 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top=newBlock+MBCS_STAGE_3_BLOCK_SIZE; 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3Top>LENGTHOF(extData->stage3)) { 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 3 entries at U+%04x\n", (int)c); 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i3=((int32_t)extData->stage2[i2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)+nextOffset; 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume extData->stage3[i3]==0 because we get 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * code points in strictly ascending order 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==UCNV_EXT_FROM_U_SUBCHAR1) { 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* <subchar1> SUB mapping, see getFromUBytesValue() and prepareFromUMappings() */ 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[i3]=1; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * precompaction is not optimal for <subchar1> |2 mappings because 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stage3 values for them are all the same, unlike for other mappings 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * which all have unique values; 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * use a simple compaction of reusing a whole block filled with these 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mappings 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* is the entire block filled with <subchar1> |2 mappings? */ 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(nextOffset==MBCS_STAGE_3_BLOCK_SIZE-1) { 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(min=i3-nextOffset; 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru min<i3 && extData->stage3[min]==1; 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++min) {} 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(min==i3) { 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the entire block is filled with these mappings */ 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(extData->stage3Sub1Block!=0) { 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* point to the previous such block and remove this block from stage3 */ 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2[i2]=extData->stage3Sub1Block; 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top-=MBCS_STAGE_3_BLOCK_SIZE; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memset(extData->stage3+extData->stage3Top, 0, MBCS_STAGE_3_BLOCK_SIZE*2); 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember this block's stage2 entry */ 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Sub1Block=extData->stage2[i2]; 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((i3b=extData->stage3bTop++)>=LENGTHOF(extData->stage3b)) { 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many stage 3b entries at U+%04x\n", (int)c); 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* roundtrip or fallback mapping */ 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3[i3]=(uint16_t)i3b; 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3b[i3b]=value; 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateFromUTrie(CnvExtData *extData, UCMTable *table, int32_t mapLength) { 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCMapping *mappings, *m; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t *map; 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t subStart, subLimit; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 *codePoints; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, next; 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mapLength==0) { 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappings=table->mappings; 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru map=table->reverseMap; 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * iterate over same-initial-code point mappings, 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * enter the initial code point into the trie, 949ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and start a recursion on the corresponding mappings section 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with generateFromUTable() 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[0]; 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru next=codePoints[0]; 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subLimit=0; 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(subLimit<mapLength) { 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* get a new subsection of mappings starting with the same code point */ 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru subStart=subLimit; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=next; 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(next==c && ++subLimit<mapLength) { 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subLimit]; 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru next=codePoints[0]; 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * compute the value for this code point; 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if there is a mapping for this code point alone, it is at subStart 969ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because the table is sorted lexically 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=0; 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru m=mappings+map[subStart]; 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codePoints=UCM_GET_CODE_POINTS(table, m); 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(m->uLen==1) { 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* do not include this in generateFromUTable() */ 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++subStart; 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart<subLimit && mappings[map[subStart]].uLen==1) { 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print error for multiple same-input-sequence mappings */ 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: multiple mappings from same Unicode code points\n"); 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, m, stderr); 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucm_printMapping(table, mappings+map[subStart], stderr); 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=getFromUBytesValue(extData, table, m); 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(subStart==subLimit) { 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the result for this one code point */ 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru addFromUTrieEntry(extData, c, value); 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the index to the subsection table */ 994ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru addFromUTrieEntry(extData, c, (uint32_t)utm_countItems(extData->fromUTableValues)); 995ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* recurse, starting from 16-bit-unit index 2, the first 16-bit unit after c */ 997ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTable(extData, table, subStart, subLimit, 2, value)) { 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate the fromU data structures from the input table. 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The input table must be sorted, and all precision flags must be 0..3. 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This function will modify the table's reverseMap. 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumakeFromUTable(CnvExtData *extData, UCMTable *table) { 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t *stage1; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, stage1Top, fromUCount; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUCount=prepareFromUMappings(table); 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1017ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUTableUChars=utm_open("cnv extension fromUTableUChars", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 2); 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUTableValues=utm_open("cnv extension fromUTableValues", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 4); 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->fromUBytes=utm_open("cnv extension fromUBytes", 0x10000, UCNV_EXT_FROM_U_DATA_MASK+1, 1); 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate all-unassigned stage blocks */ 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage2Top=MBCS_STAGE_2_FIRST_ASSIGNED; 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3Top=MBCS_STAGE_3_FIRST_ASSIGNED; 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stage 3b stores only unique values, and in 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index 0: 0 for "no mapping" 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index 1: "no mapping" with preference for <subchar1> rather than <subchar> 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3b[1]=UCNV_EXT_FROM_U_SUBCHAR1; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData->stage3bTop=2; 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* allocate the first entry in the fromUTable because index 0 means "no result" */ 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_alloc(extData->fromUTableUChars); 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utm_alloc(extData->fromUTableValues); 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!generateFromUTrie(extData, table, fromUCount)) { 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * offset the stage 1 trie entries by stage1Top because they will 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be stored in a single array 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1=extData->stage1; 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1Top=extData->stage1Top; 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<stage1Top; ++i) { 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1[i]=(uint16_t)(stage1[i]+stage1Top); 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* -------------------------------------------------------------------------- */ 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCnvExtAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData) { 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CnvExtData *extData; 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(table->unicodeMask&UCNV_HAS_SURROGATES) { 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: contains mappings for surrogate code points\n"); 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru staticData->conversionType=UCNV_MBCS; 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru extData=(CnvExtData *)cnvData; 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * assume that the table is sorted 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * call the functions in this order because 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * makeToUTable() modifies the original reverseMap, 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * makeFromUTable() writes a whole new mapping into reverseMap 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru makeToUTable(extData, table) && 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru makeFromUTable(extData, table); 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1080