1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2000-2013, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* file name: genmbcs.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2000jul06 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unewdata.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucnv_cnv.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucnvmbcs.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucm.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "makeconv.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "genmbcs.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * TODO: Split this file into toUnicode, SBCSFromUnicode and MBCSFromUnicode files. 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Reduce tests for maxCharLength. 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustruct MBCSData { 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NewConverter newConverter; 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCMFile *ucm; 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* toUnicode (state table in ucm->states) */ 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _MBCSToUFallback toUFallbacks[MBCS_MAX_FALLBACK_COUNT]; 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t countToUFallbacks; 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t *unicodeCodeUnits; 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fromUnicode */ 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t stage1[MBCS_STAGE_1_SIZE]; 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t stage2Single[MBCS_STAGE_2_SIZE]; /* stage 2 for single-byte codepages */ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t stage2[MBCS_STAGE_2_SIZE]; /* stage 2 for MBCS */ 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *fromUBytes; 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t stage2Top, stage3Top; 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fromUTF8 */ 51c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint16_t stageUTF8[0x10000>>MBCS_UTF8_STAGE_SHIFT]; /* allow for utf8Max=0xffff */ 52c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 53c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 54c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Maximum UTF-8-friendly code point. 55c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 0 if !utf8Friendly, otherwise 0x01ff..0xffff in steps of 0x100. 56c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If utf8Friendly, utf8Max is normally either MBCS_UTF8_MAX or 0xffff. 57c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 58c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint16_t utf8Max; 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool utf8Friendly; 61c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UBool omitFromU; 62c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru}; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* prototypes */ 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSClose(NewConverter *cnvData); 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSStartMappings(MBCSData *mbcsData); 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddToUnicode(MBCSData *mbcsData, 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSIsValid(NewConverter *cnvData, 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSSingleAddFromUnicode(MBCSData *mbcsData, 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddFromUnicode(MBCSData *mbcsData, 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData *staticData); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData); 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* helper ------------------------------------------------------------------- */ 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 105103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline char 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruhexDigit(uint8_t digit) { 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return digit<=9 ? (char)('0'+digit) : (char)('a'-10+digit); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline char * 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruprintBytes(char *buffer, const uint8_t *bytes, int32_t length) { 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *s=buffer; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *s++=hexDigit((uint8_t)(*bytes>>4)); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *s++=hexDigit((uint8_t)(*bytes&0xf)); 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++bytes; 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *s=0; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return buffer; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* implementation ----------------------------------------------------------- */ 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Querustatic MBCSData gDummy; 127c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 128c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruU_CFUNC const MBCSData * 129c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruMBCSGetDummy() { 130c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_memset(&gDummy, 0, sizeof(MBCSData)); 131c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 132c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 133c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Set "pessimistic" values which may sometimes move too many 134c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * mappings to the extension table (but never too few). 135c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * These values cause MBCSOkForBaseFromUnicode() to return FALSE for the 136c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * largest set of mappings. 137c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Assume maxCharLength>1. 138c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 139c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gDummy.utf8Friendly=TRUE; 140c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(SMALL) { 141c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gDummy.utf8Max=0xffff; 142c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gDummy.omitFromU=TRUE; 143c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 144c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru gDummy.utf8Max=MBCS_UTF8_MAX; 145c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 146c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return &gDummy; 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 148c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSInit(MBCSData *mbcsData, UCMFile *ucm) { 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(mbcsData, 0, sizeof(MBCSData)); 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->ucm=ucm; /* aliased, not owned */ 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->newConverter.close=MBCSClose; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->newConverter.isValid=MBCSIsValid; 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->newConverter.addTable=MBCSAddTable; 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->newConverter.write=MBCSWrite; 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNewConverter * 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSOpen(UCMFile *ucm) { 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSData *mbcsData=(MBCSData *)uprv_malloc(sizeof(MBCSData)); 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData==NULL) { 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("out of memory\n"); 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSInit(mbcsData, ucm); 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return &mbcsData->newConverter; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSDestruct(MBCSData *mbcsData) { 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(mbcsData->unicodeCodeUnits); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(mbcsData->fromUBytes); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSClose(NewConverter *cnvData) { 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSData *mbcsData=(MBCSData *)cnvData; 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData!=NULL) { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSDestruct(mbcsData); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(mbcsData); 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSStartMappings(MBCSData *mbcsData) { 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, sum, maxCharLength, 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage2NullLength, stage2AllocLength, 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3NullLength, stage3AllocLength; 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* toUnicode */ 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate the code unit array and prefill it with "unassigned" values */ 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=mbcsData->ucm->states.countToUCodeUnits; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(VERBOSE) { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("the total number of offsets is 0x%lx=%ld\n", (long)sum, (long)sum); 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(sum>0) { 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t)); 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->unicodeCodeUnits==NULL) { 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: out of memory allocating %ld 16-bit code units\n", 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (long)sum); 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<sum; ++i) { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[i]=0xfffe; 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fromUnicode */ 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxCharLength=mbcsData->ucm->states.maxCharLength; 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate the codepage mappings and preset the first 16 characters to 0 */ 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate 64k 16-bit results for single-byte codepages */ 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=0x20000; 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate 1M * maxCharLength bytes for at most 1M mappings */ 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=0x100000*maxCharLength; 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->fromUBytes=(uint8_t *)uprv_malloc(sum); 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->fromUBytes==NULL) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: out of memory allocating %ld B for target mappings\n", (long)sum); 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(mbcsData->fromUBytes, 0, sum); 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UTF-8-friendly fromUnicode tries: allocate multiple blocks at a time. 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See ucnvmbcs.h for details. 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * There is code, for example in ucnv_MBCSGetUnicodeSetForUnicode(), which 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * assumes that the initial stage 2/3 blocks are the all-unassigned ones. 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Therefore, we refine the data structure while maintaining this placement 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * even though it would be convenient to allocate the ASCII block at the 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * beginning of stage 3, for example. 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UTF-8-friendly fromUnicode tries work from sorted tables and are built 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pre-compacted, overlapping adjacent stage 2/3 blocks. 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is necessary because the block allocation and compaction changes 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at SBCS_UTF8_MAX or MBCS_UTF8_MAX, and for MBCS tables the additional 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stage table uses direct indexes into stage 3, without a multiplier and 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * thus with a smaller reach. 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Non-UTF-8-friendly fromUnicode tries work from unsorted tables 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (because implicit precision is used), and are compacted 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in post-processing. 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Preallocation for UTF-8-friendly fromUnicode tries: 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Stage 3: 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 64-entry all-unassigned first block followed by ASCII (128 entries). 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Stage 2: 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 64-entry all-unassigned first block followed by preallocated 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 64-block for ASCII. 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Preallocate ASCII as a linear 128-entry stage 3 block. */ 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage2NullLength=MBCS_STAGE_2_BLOCK_SIZE; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage2AllocLength=MBCS_STAGE_2_BLOCK_SIZE; 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3NullLength=MBCS_UTF8_STAGE_3_BLOCK_SIZE; 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3AllocLength=128; /* ASCII U+0000..U+007f */ 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Initialize stage 1 for the preallocated blocks. */ 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=stage2NullLength; 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<(stage2AllocLength>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT); ++i) { 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i]=sum; 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum+=MBCS_STAGE_2_BLOCK_SIZE; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Top=stage2NullLength+stage2AllocLength; /* ==sum */ 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Stage 2 indexes count 16-blocks in stage 3 as follows: 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SBCS: directly, indexes increment by 16 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * MBCS: indexes need to be multiplied by 16*maxCharLength, indexes increment by 1 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * MBCS UTF-8: directly, indexes increment by 16 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=stage3NullLength; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<(stage3AllocLength/MBCS_STAGE_3_BLOCK_SIZE); ++i) { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Single[mbcsData->stage1[0]+i]=sum; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum+=MBCS_STAGE_3_BLOCK_SIZE; 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=stage3NullLength/MBCS_STAGE_3_GRANULARITY; 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<(stage3AllocLength/MBCS_STAGE_3_BLOCK_SIZE); ++i) { 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2[mbcsData->stage1[0]+i]=sum; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum+=MBCS_STAGE_3_BLOCK_SIZE/MBCS_STAGE_3_GRANULARITY; 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum=stage3NullLength; 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<(stage3AllocLength/MBCS_UTF8_STAGE_3_BLOCK_SIZE); ++i) { 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stageUTF8[i]=sum; 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sum+=MBCS_UTF8_STAGE_3_BLOCK_SIZE; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Allocate a 64-entry all-unassigned first stage 3 block, 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for UTF-8-friendly lookup with a trail byte, 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * plus 128 entries for ASCII. 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top=(stage3NullLength+stage3AllocLength)*maxCharLength; /* ==sum*maxCharLength */ 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* return TRUE for success */ 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusetFallback(MBCSData *mbcsData, uint32_t offset, UChar32 c) { 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset); 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>=0) { 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if there is already a fallback for this offset, then overwrite it */ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->toUFallbacks[i].codePoint=c; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* if there is no fallback for this offset, then add one */ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i=mbcsData->countToUFallbacks; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>=MBCS_MAX_FALLBACK_COUNT) { 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: too many toUnicode fallbacks, currently at: U+%x\n", (int)c); 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->toUFallbacks[i].offset=offset; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->toUFallbacks[i].codePoint=c; 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->countToUFallbacks=i+1; 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* remove fallback if there is one with this offset; return the code point if there was such a fallback, otherwise -1 */ 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruremoveFallback(MBCSData *mbcsData, uint32_t offset) { 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=ucm_findFallback(mbcsData->toUFallbacks, mbcsData->countToUFallbacks, offset); 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>=0) { 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _MBCSToUFallback *toUFallbacks; 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit, old; 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toUFallbacks=mbcsData->toUFallbacks; 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit=mbcsData->countToUFallbacks; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=(int32_t)toUFallbacks[i].codePoint; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the last fallback entry here to keep the list contiguous */ 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toUFallbacks[i].offset=toUFallbacks[limit-1].offset; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toUFallbacks[i].codePoint=toUFallbacks[limit-1].codePoint; 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->countToUFallbacks=limit-1; 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return old; 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isFallback is almost a boolean: 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1 (TRUE) this is a fallback mapping 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 0 (FALSE) this is a precise mapping 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * -1 the precision of this mapping is not specified 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddToUnicode(MBCSData *mbcsData, 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag) { 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[10]; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t offset=0; 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0, entry, old; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t state=0; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->ucm->states.countStates==0) { 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: there is no state information!\n"); 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */ 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==2 && mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO) { 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state=1; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Walk down the state table like in conversion, 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * much like getNextUChar(). 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We assume that c<=0x10ffff. 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0;;) { 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru entry=mbcsData->ucm->states.stateTable[state][bytes[i++]]; 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(MBCS_ENTRY_IS_TRANSITION(entry)) { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i==length) { 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: byte sequence too short, ends in non-final state %hu: 0x%s (U+%x)\n", 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (short)state, printBytes(buffer, bytes, length), (int)c); 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<length) { 40254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fprintf(stderr, "error: byte sequence too long by %d bytes, final state %u: 0x%s (U+%x)\n", 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)(length-i), state, printBytes(buffer, bytes, length), (int)c); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(MBCS_ENTRY_FINAL_ACTION(entry)) { 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_ILLEGAL: 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: byte sequence ends in illegal state at U+%04x<->0x%s\n", 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_CHANGE_ONLY: 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: byte sequence ends in state-change-only at U+%04x<->0x%s\n", 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_UNASSIGNED: 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: byte sequence ends in unassigned state at U+%04x<->0x%s\n", 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_FALLBACK_DIRECT_16: 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_VALID_DIRECT_16: 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_FALLBACK_DIRECT_20: 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_VALID_DIRECT_20: 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(MBCS_ENTRY_SET_STATE(entry, 0)!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) { 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the "direct" action's value is not "valid-direct-16-unassigned" any more */ 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_DIRECT_16 || MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_FALLBACK_DIRECT_16) { 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=MBCS_ENTRY_FINAL_VALUE(entry); 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>=0) { 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(VERBOSE) { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Continue after the above warning 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the precision of the mapping is unspecified. 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reassign the correct action code */ 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, (MBCS_STATE_VALID_DIRECT_16+(flag==3 ? 2 : 0)+(c>=0x10000 ? 1 : 0))); 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* put the code point into bits 22..7 for BMP, c-0x10000 into 26..7 for others */ 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<=0xffff) { 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru entry=MBCS_ENTRY_FINAL_SET_VALUE(entry, c); 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru entry=MBCS_ENTRY_FINAL_SET_VALUE(entry, c-0x10000); 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->ucm->states.stateTable[state][bytes[i-1]]=entry; 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_VALID_16: 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bits 26..16 are not used, 0 */ 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bits 15..7 contain the final offset delta to one 16-bit code unit */ 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check that this byte sequence is still unassigned */ 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((old=mbcsData->unicodeCodeUnits[offset])!=0xfffe || (old=removeFallback(mbcsData, offset))!=-1) { 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>=0) { 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(VERBOSE) { 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c>=0x10000) { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: code point does not fit into valid-16-bit state at U+%04x<->0x%s\n", 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>0) { 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* assign only if there is no precise mapping */ 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->unicodeCodeUnits[offset]==0xfffe) { 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return setFallback(mbcsData, offset, c); 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)c; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case MBCS_STATE_VALID_16_PAIR: 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bits 26..16 are not used, 0 */ 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* bits 15..7 contain the final offset delta to two 16-bit code units */ 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check that this byte sequence is still unassigned */ 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=mbcsData->unicodeCodeUnits[offset]; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(old<0xfffe) { 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t real; 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(old<0xd800) { 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru real=old; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(old<=0xdfff) { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru real=0x10000+((old&0x3ff)<<10)+((mbcsData->unicodeCodeUnits[offset+1])&0x3ff); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* old<=0xe001 */ { 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru real=mbcsData->unicodeCodeUnits[offset+1]; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>=0) { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)real); 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(VERBOSE) { 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "duplicate codepage byte sequence at U+%04x<->0x%s see U+%04x\n", 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)real); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>0) { 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* assign only if there is no precise mapping */ 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(old<=0xdbff || old==0xe000) { 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* do nothing */ 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0xffff) { 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set a BMP fallback code point as a pair with 0xe001 */ 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset++]=0xe001; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)c; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set a fallback surrogate pair with two second surrogates */ 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xdbc0+(c>>10)); 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff)); 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0xd800) { 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set a BMP code point */ 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)c; 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0xffff) { 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set a BMP code point above 0xd800 as a pair with 0xe000 */ 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset++]=0xe000; 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)c; 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set a surrogate pair */ 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset++]=(uint16_t)(0xd7c0+(c>>10)); 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->unicodeCodeUnits[offset]=(uint16_t)(0xdc00+(c&0x3ff)); 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reserved, must never occur */ 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "internal error: byte sequence reached reserved action code, entry 0x%02x: 0x%s (U+%x)\n", 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)entry, printBytes(buffer, bytes, length), (int)c); 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* is this byte sequence valid? (this is almost the same as MBCSAddToUnicode()) */ 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSIsValid(NewConverter *cnvData, 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length) { 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSData *mbcsData=(MBCSData *)cnvData; 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (UBool)(1==ucm_countChars(&mbcsData->ucm->states, bytes, length)); 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSSingleAddFromUnicode(MBCSData *mbcsData, 558103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const uint8_t *bytes, int32_t /*length*/, 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag) { 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t *stage3, *p; 562b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t idx; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t old; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t blockSize, newTop, i, nextOffset, newBlock, min; 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ignore |2 SUB mappings */ 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag==2) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Walk down the triple-stage compact array ("trie") and 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * allocate parts as necessary. 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note that the first stage 2 and 3 blocks are reserved for all-unassigned mappings. 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We assume that length<=maxCharLength and that c<=0x10ffff. 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3=(uint16_t *)mbcsData->fromUBytes; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=*bytes; 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* inspect stage 1 */ 583b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=c>>MBCS_STAGE_1_SHIFT; 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) { 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK&~(MBCS_UTF8_STAGE_3_BLOCKS-1); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK; 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 589b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) { 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate another block in stage 2 */ 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock=mbcsData->stage2Top; 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(min<newBlock && mbcsData->stage2Single[newBlock-1]==0) { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --newBlock; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newTop=newBlock+MBCS_STAGE_2_BLOCK_SIZE; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTop>MBCS_MAX_STAGE_2_TOP) { 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: too many stage 2 entries at U+%04x<->0x%02x\n", (int)c, b); 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * each stage 2 block contains 64 16-bit words: 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 6 code point bits 9..4 with 1 stage 3 index 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 609b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mbcsData->stage1[idx]=(uint16_t)newBlock; 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Top=newTop; 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* inspect stage 2 */ 614b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=mbcsData->stage1[idx]+nextOffset; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly && c<=SBCS_UTF8_MAX) { 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate 64-entry blocks for UTF-8-friendly lookup */ 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru blockSize=MBCS_UTF8_STAGE_3_BLOCK_SIZE; 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=c&MBCS_UTF8_STAGE_3_BLOCK_MASK; 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru blockSize=MBCS_STAGE_3_BLOCK_SIZE; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=c&MBCS_STAGE_3_BLOCK_MASK; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 623b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(mbcsData->stage2Single[idx]==0) { 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate another block in stage 3 */ 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock=mbcsData->stage3Top; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly) { 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(min<newBlock && stage3[newBlock-1]==0) { 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --newBlock; 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newTop=newBlock+blockSize; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTop>MBCS_STAGE_3_SBCS_SIZE) { 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: too many code points at U+%04x<->0x%02x\n", (int)c, b); 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each block has 16 uint16_t entries */ 639b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru i=idx; 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(newBlock<newTop) { 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Single[i++]=(uint16_t)newBlock; 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock+=MBCS_STAGE_3_BLOCK_SIZE; 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top=newTop; /* ==newBlock */ 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the codepage entry into stage 3 and get the previous entry */ 648b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru p=stage3+mbcsData->stage2Single[idx]+nextOffset; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=*p; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag<=0) { 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p=(uint16_t)(0xf00|b); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(IS_PRIVATE_USE(c)) { 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p=(uint16_t)(0xc00|b); 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p=(uint16_t)(0x800|b); 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check that this Unicode code point was still unassigned */ 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(old>=0x100) { 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>=0) { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: duplicate Unicode code point at U+%04x<->0x%02x see 0x%02x\n", 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, b, old&0xff); 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(VERBOSE) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "duplicate Unicode code point at U+%04x<->0x%02x see 0x%02x\n", 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, b, old&0xff); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue after the above warning if the precision of the mapping is unspecified */ 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddFromUnicode(MBCSData *mbcsData, 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t flag) { 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[10]; 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *pb; 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *stage3, *p; 682b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru uint32_t idx, b, old, stage3Index; 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxCharLength; 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t blockSize, newTop, i, nextOffset, newBlock, min, overlap, maxOverlap; 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxCharLength=mbcsData->ucm->states.maxCharLength; 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( mbcsData->ucm->states.outputType==MBCS_OUTPUT_2_SISO && 69050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (!IGNORE_SISO_CHECK && (*bytes==0xe || *bytes==0xf)) 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: illegal mapping to SI or SO for SI/SO codepage: U+%04x<->0x%s\n", 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag==1 && length==1 && *bytes==0) { 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: unable to encode a |1 fallback from U+%04x to 0x%02x\n", 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, *bytes); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Walk down the triple-stage compact array ("trie") and 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * allocate parts as necessary. 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Note that the first stage 2 and 3 blocks are reserved for 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * all-unassigned mappings. 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We assume that length<=maxCharLength and that c<=0x10ffff. 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3=mbcsData->fromUBytes; 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* inspect stage 1 */ 713b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=c>>MBCS_STAGE_1_SHIFT; 714c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK&~(MBCS_UTF8_STAGE_3_BLOCKS-1); 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=(c>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK; 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 719b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(mbcsData->stage1[idx]==MBCS_STAGE_2_ALL_UNASSIGNED_INDEX) { 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate another block in stage 2 */ 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock=mbcsData->stage2Top; 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly) { 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru min=newBlock-nextOffset; /* minimum block start with overlap */ 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(min<newBlock && mbcsData->stage2[newBlock-1]==0) { 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --newBlock; 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newTop=newBlock+MBCS_STAGE_2_BLOCK_SIZE; 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTop>MBCS_MAX_STAGE_2_TOP) { 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: too many stage 2 entries at U+%04x<->0x%s\n", 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * each stage 2 block contains 64 32-bit words: 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 6 code point bits 9..4 with value with bits 31..16 "assigned" flags and bits 15..0 stage 3 index 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 740b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru i=idx; 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(newBlock<newTop) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i++]=(uint16_t)newBlock; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock+=MBCS_STAGE_2_BLOCK_SIZE; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Top=newTop; /* ==newBlock */ 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* inspect stage 2 */ 749b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=mbcsData->stage1[idx]+nextOffset; 750c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate 64-entry blocks for UTF-8-friendly lookup */ 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru blockSize=MBCS_UTF8_STAGE_3_BLOCK_SIZE*maxCharLength; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=c&MBCS_UTF8_STAGE_3_BLOCK_MASK; 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru blockSize=MBCS_STAGE_3_BLOCK_SIZE*maxCharLength; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextOffset=c&MBCS_STAGE_3_BLOCK_MASK; 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 758b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(mbcsData->stage2[idx]==0) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate another block in stage 3 */ 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock=mbcsData->stage3Top; 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly && nextOffset>=MBCS_STAGE_3_GRANULARITY) { 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Overlap stage 3 blocks only in multiples of 16-entry blocks 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because of the indexing granularity in stage 2. 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxOverlap=(nextOffset&~(MBCS_STAGE_3_GRANULARITY-1))*maxCharLength; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(overlap=0; 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru overlap<maxOverlap && stage3[newBlock-overlap-1]==0; 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++overlap) {} 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru overlap=(overlap/MBCS_STAGE_3_GRANULARITY)/maxCharLength; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru overlap=(overlap*MBCS_STAGE_3_GRANULARITY)*maxCharLength; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock-=overlap; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newTop=newBlock+blockSize; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(newTop>MBCS_STAGE_3_MBCS_SIZE*(uint32_t)maxCharLength) { 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: too many code points at U+%04x<->0x%s\n", 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length)); 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each block has 16*maxCharLength bytes */ 784b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru i=idx; 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(newBlock<newTop) { 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2[i++]=(newBlock/MBCS_STAGE_3_GRANULARITY)/maxCharLength; 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newBlock+=MBCS_STAGE_3_BLOCK_SIZE*maxCharLength; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top=newTop; /* ==newBlock */ 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 792b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru stage3Index=MBCS_STAGE_3_GRANULARITY*(uint32_t)(uint16_t)mbcsData->stage2[idx]; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Build an alternate, UTF-8-friendly stage table as well. */ 795c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->utf8Friendly && c<=mbcsData->utf8Max) { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Overflow for uint16_t entries in stageUTF8? */ 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stage3Index>0xffff) { 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This can occur only if the mapping table is nearly perfectly filled and if 800c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * utf8Max==0xffff. 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (There is no known charset like this. GB 18030 does not map 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * surrogate code points and LMBCS does not map 256 PUA code points.) 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Otherwise, stage3Index<=MBCS_UTF8_LIMIT<0xffff 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (stage3Index can at most reach exactly MBCS_UTF8_LIMIT) 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because we have a sorted table and there are at most MBCS_UTF8_LIMIT 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * mappings with 0<=c<MBCS_UTF8_LIMIT, and there is only also 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial all-unassigned block in stage3. 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 810c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Solution for the overflow: Reduce utf8Max to the next lower value, 0xfeff. 811c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (See svn revision 20866 of the markus/ucnvutf8 feature branch for 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code that causes MBCSAddTable() to rebuild the table not utf8Friendly 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in case of overflow. That code was not tested.) 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 816c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max=0xfeff; 817c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 818c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 819c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * The stage 3 block has been assigned for the regular trie. 820c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Just copy its index into stageUTF8[], without the granularity. 821c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 822c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->stageUTF8[c>>MBCS_UTF8_STAGE_SHIFT]=(uint16_t)stage3Index; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the codepage bytes into stage 3 and get the previous bytes */ 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* assemble the bytes into a single integer */ 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pb=bytes; 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=0; 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(length) { 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=*pb++; 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=(b<<8)|*pb++; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=(b<<8)|*pb++; 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=(b<<8)|*pb++; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=0; 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p=stage3+(stage3Index+nextOffset)*maxCharLength; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(maxCharLength) { 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=*(uint16_t *)p; 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(uint16_t *)p=(uint16_t)b; 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=(uint32_t)*p<<16; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(b>>16); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old|=(uint32_t)*p<<8; 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p++=(uint8_t)(b>>8); 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old|=*p; 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *p=(uint8_t)b; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old=*(uint32_t *)p; 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(uint32_t *)p=b; 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* will never occur */ 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check that this Unicode code point was still unassigned */ 869b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if((mbcsData->stage2[idx+(nextOffset>>MBCS_STAGE_2_SHIFT)]&(1UL<<(16+(c&0xf))))!=0 || old!=0) { 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag>=0) { 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: duplicate Unicode code point at U+%04x<->0x%s see 0x%02x\n", 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(VERBOSE) { 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "duplicate Unicode code point at U+%04x<->0x%s see 0x%02x\n", 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)c, printBytes(buffer, bytes, length), (int)old); 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue after the above warning if the precision of the mapping is 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unspecified */ 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(flag<=0) { 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set the roundtrip flag */ 883b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru mbcsData->stage2[idx+(nextOffset>>4)]|=(1UL<<(16+(c&0xf))); 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC UBool 890c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste QueruMBCSOkForBaseFromUnicode(const MBCSData *mbcsData, 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *bytes, int32_t length, 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, int8_t flag) { 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A 1:1 mapping does not fit into the MBCS base table's fromUnicode table under 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the following conditions: 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - a |2 SUB mapping for <subchar1> (no base table data structure for them) 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - a |1 fallback to 0x00 (result value 0, indistinguishable from unmappable entry) 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - a multi-byte mapping with leading 0x00 bytes (no explicit length field) 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Some of these tests are redundant with ucm_mappingType(). 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( (flag==2 && length==1) || 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (flag==1 && bytes[0]==0) || /* testing length==1 would be redundant with the next test */ 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (flag<=1 && length>1 && bytes[0]==0) 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Additional restrictions for UTF-8-friendly fromUnicode tables, 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for code points up to the maximum optimized one: 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - any mapping to 0x00 (result value 0, indistinguishable from unmappable entry) 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - any |1 fallback (no roundtrip flags in the optimized table) 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 917c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->utf8Friendly && flag<=1 && c<=mbcsData->utf8Max && (bytes[0]==0 || flag==1)) { 918c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return FALSE; 919c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 920c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 921c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 922c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If we omit the fromUnicode data, we can only store roundtrips there 923c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * because only they are recoverable from the toUnicode data. 924c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Fallbacks must go into the extension table. 925c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 926c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->omitFromU && flag!=0) { 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* All other mappings do fit into the base table. */ 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* we can assume that the table only contains 1:1 mappings with <=4 bytes each */ 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSAddTable(NewConverter *cnvData, UCMTable *table, UConverterStaticData *staticData) { 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSData *mbcsData; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCMapping *m; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, maxCharLength; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t f; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isOK, utf8Friendly; 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->unicodeMask=table->unicodeMask; 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(staticData->unicodeMask==3) { 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: contains mappings for both supplementary and surrogate code points\n"); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->conversionType=UCNV_MBCS; 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData=(MBCSData *)cnvData; 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxCharLength=mbcsData->ucm->states.maxCharLength; 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generation of UTF-8-friendly data requires 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a sorted table, which makeconv generates when explicit precision 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indicators are used. 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->utf8Friendly=utf8Friendly=(UBool)((table->flagsType&UCM_FLAGS_EXPLICIT)!=0); 961c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(utf8Friendly) { 962c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max=MBCS_UTF8_MAX; 963c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(SMALL && maxCharLength>1) { 964c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->omitFromU=TRUE; 965c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 966c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 967c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max=0; 968c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(SMALL && maxCharLength>1) { 969c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru fprintf(stderr, 970c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru "makeconv warning: --small not available for .ucm files without |0 etc.\n"); 971c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 972c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!MBCSStartMappings(mbcsData)) { 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->hasFromUnicodeFallback=FALSE; 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->hasToUnicodeFallback=FALSE; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK=TRUE; 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m=table->mappings; 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<table->mappingsLength; ++m, ++i) { 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=m->u; 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru f=m->f; 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 988c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 989c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Small optimization for --small .cnv files: 990c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 991c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * If there are fromUnicode mappings above MBCS_UTF8_MAX, 992c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * then the file size will be smaller if we make utf8Max larger 993c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * because the size increase in stageUTF8 will be more than balanced by 994c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * how much less of stage2 needs to be stored. 995c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 996c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * There is no point in doing this incrementally because stageUTF8 997c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * uses so much less space per block than stage2, 998c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * so we immediately increase utf8Max to 0xffff. 999c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * 1000c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Do not increase utf8Max if it is already at 0xfeff because MBCSAddFromUnicode() 1001c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * sets it to that value when stageUTF8 overflows. 1002c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 1003c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if( mbcsData->omitFromU && f<=1 && 1004c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max<c && c<=0xffff && 1005c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max<0xfeff 1006c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ) { 1007c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru mbcsData->utf8Max=0xffff; 1008c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1009c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(f) { 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case -1: 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* there was no precision/fallback indicator */ 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fall through to set the mappings */ 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set roundtrip mappings */ 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1020c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->f|=MBCS_FROM_U_EXT_FLAG; 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->moveFlag=UCM_MOVE_TO_EXT; 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set only a fallback mapping from Unicode to codepage */ 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->hasFromUnicodeFallback=TRUE; 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSSingleAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1032c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else if(MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f)) { 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->hasFromUnicodeFallback=TRUE; 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSAddFromUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->f|=MBCS_FROM_U_EXT_FLAG; 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->moveFlag=UCM_MOVE_TO_EXT; 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ignore |2 SUB mappings, except to move <subchar1> mappings to the extension table */ 1042c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(maxCharLength>1 && m->bLen==1) { 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->f|=MBCS_FROM_U_EXT_FLAG; 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru m->moveFlag=UCM_MOVE_TO_EXT; 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set only a fallback mapping from codepage to Unicode */ 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticData->hasToUnicodeFallback=TRUE; 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isOK&=MBCSAddToUnicode(mbcsData, m->b.bytes, m->bLen, c, f); 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 10528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case 4: 10538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius /* move "good one-way" mappings to the extension table */ 10548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius m->f|=MBCS_FROM_U_EXT_FLAG; 10558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius m->moveFlag=UCM_MOVE_TO_EXT; 10568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius break; 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* will not occur because the parser checked it already */ 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: illegal fallback indicator %d\n", f); 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSPostprocess(mbcsData, staticData); 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return isOK; 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UBool 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerutransformEUC(MBCSData *mbcsData) { 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *p8; 107254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uint32_t i, value, oldLength, old3Top; 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t b; 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru oldLength=mbcsData->ucm->states.maxCharLength; 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(oldLength<3) { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru old3Top=mbcsData->stage3Top; 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* careful: 2-byte and 4-byte codes are stored in platform endianness! */ 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test if all first bytes are in {0, 0x8e, 0x8f} */ 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8=mbcsData->fromUBytes; 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !U_IS_BIG_ENDIAN 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(oldLength==4) { 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8+=3; 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<old3Top; i+=oldLength) { 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=p8[i]; 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b!=0 && b!=0x8e && b!=0x8f) { 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* some first byte does not fit the EUC pattern, nothing to be done */ 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* restore p if it was modified above */ 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8=mbcsData->fromUBytes; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* modify outputType and adjust stage3Top */ 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->ucm->states.outputType=(int8_t)(MBCS_OUTPUT_3_EUC+oldLength-3); 110554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius mbcsData->stage3Top=(old3Top*(oldLength-1))/oldLength; 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * EUC-encode all byte sequences; 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * see "CJKV Information Processing" (1st ed. 1999) from Ken Lunde, O'Reilly, 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * p. 161 in chapter 4 "Encoding Methods" 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This also must reverse the byte order if the platform is little-endian! 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(oldLength==3) { 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t *q=(uint16_t *)p8; 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<old3Top; i+=oldLength) { 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru b=*p8; 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(b==0) { 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* short sequences are stored directly */ 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 0 or 1 */ 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint16_t)((p8[1]<<8)|p8[2]); 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(b==0x8e) { 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 2 */ 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint16_t)(((p8[1]&0x7f)<<8)|p8[2]); 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* b==0x8f */ { 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 3 */ 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint16_t)((p8[1]<<8)|(p8[2]&0x7f)); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru p8+=3; 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* oldLength==4 */ { 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *q=p8; 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t *p32=(uint32_t *)p8; 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<old3Top; i+=4) { 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru value=(*p32++); 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value<=0xffffff) { 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* short sequences are stored directly */ 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 0 or 1 */ 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)(value>>16); 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)(value>>8); 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)value; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(value<=0x8effffff) { 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 2 */ 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)((value>>16)&0x7f); 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)(value>>8); 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)value; 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* first byte is 0x8f */ { 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code set 3 */ 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)(value>>16); 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)((value>>8)&0x7f); 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*q++)=(uint8_t)value; 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compact stage 2 for SBCS by overlapping adjacent stage 2 blocks as far 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as possible. Overlapping is done on unassigned head and tail 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parts of blocks in steps of MBCS_STAGE_2_MULTIPLIER. 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Stage 1 indexes need to be adjusted accordingly. 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function is very similar to genprops/store.c/compactStage(). 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusingleCompactStage2(MBCSData *mbcsData) { 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this array maps the ordinal number of a stage 2 block to its new stage 1 index */ 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t map[MBCS_STAGE_2_MAX_BLOCKS]; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t i, start, prevEnd, newStart; 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* enter the all-unassigned first stage 2 block into the map */ 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[0]=MBCS_STAGE_2_ALL_UNASSIGNED_INDEX; 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* begin with the first block after the all-unassigned one */ 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart=MBCS_STAGE_2_FIRST_ASSIGNED; 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<mbcsData->stage2Top) { 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevEnd=(uint16_t)(newStart-1); 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find the size of the overlap */ 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2Single[start+i]==0 && mbcsData->stage2Single[prevEnd-i]==0; ++i) {} 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>0) { 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=(uint16_t)(newStart-i); 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the non-overlapping indexes to their new positions */ 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+=i; 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=(uint16_t)(MBCS_STAGE_2_BLOCK_SIZE-i); i>0; --i) { 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++]; 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(newStart<start) { 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the indexes to their new positions */ 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=newStart; 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=MBCS_STAGE_2_BLOCK_SIZE; i>0; --i) { 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Single[newStart++]=mbcsData->stage2Single[start++]; 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* no overlap && newStart==start */ { 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=start; 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart+=MBCS_STAGE_2_BLOCK_SIZE; 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* adjust stage2Top */ 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(VERBOSE && newStart<mbcsData->stage2Top) { 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("compacting stage 2 from stage2Top=0x%lx to 0x%lx, saving %ld bytes\n", 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage2Top, (unsigned long)newStart, 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (long)(mbcsData->stage2Top-newStart)*2); 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Top=newStart; 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now adjust stage 1 */ 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<MBCS_STAGE_1_SIZE; ++i) { 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]; 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Compact stage 3 for SBCS - same algorithm as above. */ 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusingleCompactStage3(MBCSData *mbcsData) { 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t *stage3=(uint16_t *)mbcsData->fromUBytes; 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this array maps the ordinal number of a stage 3 block to its new stage 2 index */ 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t map[0x1000]; 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t i, start, prevEnd, newStart; 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* enter the all-unassigned first stage 3 block into the map */ 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[0]=0; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* begin with the first block after the all-unassigned one */ 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart=16; 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<mbcsData->stage3Top) { 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevEnd=(uint16_t)(newStart-1); 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find the size of the overlap */ 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<16 && stage3[start+i]==0 && stage3[prevEnd-i]==0; ++i) {} 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>0) { 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>4]=(uint16_t)(newStart-i); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the non-overlapping indexes to their new positions */ 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+=i; 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=(uint16_t)(16-i); i>0; --i) { 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3[newStart++]=stage3[start++]; 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(newStart<start) { 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the indexes to their new positions */ 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>4]=newStart; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=16; i>0; --i) { 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3[newStart++]=stage3[start++]; 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* no overlap && newStart==start */ { 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>4]=start; 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart+=16; 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* adjust stage3Top */ 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(VERBOSE && newStart<mbcsData->stage3Top) { 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("compacting stage 3 from stage3Top=0x%lx to 0x%lx, saving %ld bytes\n", 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage3Top, (unsigned long)newStart, 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (long)(mbcsData->stage3Top-newStart)*2); 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top=newStart; 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now adjust stage 2 */ 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<mbcsData->stage2Top; ++i) { 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Single[i]=map[mbcsData->stage2Single[i]>>4]; 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Compact stage 2 by overlapping adjacent stage 2 blocks as far 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as possible. Overlapping is done on unassigned head and tail 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parts of blocks in steps of MBCS_STAGE_2_MULTIPLIER. 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Stage 1 indexes need to be adjusted accordingly. 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function is very similar to genprops/store.c/compactStage(). 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucompactStage2(MBCSData *mbcsData) { 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* this array maps the ordinal number of a stage 2 block to its new stage 1 index */ 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t map[MBCS_STAGE_2_MAX_BLOCKS]; 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t i, start, prevEnd, newStart; 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* enter the all-unassigned first stage 2 block into the map */ 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[0]=MBCS_STAGE_2_ALL_UNASSIGNED_INDEX; 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* begin with the first block after the all-unassigned one */ 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart=MBCS_STAGE_2_FIRST_ASSIGNED; 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(start<mbcsData->stage2Top) { 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevEnd=(uint16_t)(newStart-1); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find the size of the overlap */ 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<MBCS_STAGE_2_BLOCK_SIZE && mbcsData->stage2[start+i]==0 && mbcsData->stage2[prevEnd-i]==0; ++i) {} 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>0) { 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=(uint16_t)(newStart-i); 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the non-overlapping indexes to their new positions */ 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start+=i; 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=(uint16_t)(MBCS_STAGE_2_BLOCK_SIZE-i); i>0; --i) { 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2[newStart++]=mbcsData->stage2[start++]; 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(newStart<start) { 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* move the indexes to their new positions */ 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=newStart; 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=MBCS_STAGE_2_BLOCK_SIZE; i>0; --i) { 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2[newStart++]=mbcsData->stage2[start++]; 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* no overlap && newStart==start */ { 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru map[start>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]=start; 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=newStart+=MBCS_STAGE_2_BLOCK_SIZE; 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* adjust stage2Top */ 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(VERBOSE && newStart<mbcsData->stage2Top) { 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("compacting stage 2 from stage2Top=0x%lx to 0x%lx, saving %ld bytes\n", 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage2Top, (unsigned long)newStart, 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (long)(mbcsData->stage2Top-newStart)*4); 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage2Top=newStart; 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* now adjust stage 1 */ 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<MBCS_STAGE_1_SIZE; ++i) { 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i]=map[mbcsData->stage1[i]>>MBCS_STAGE_2_BLOCK_SIZE_SHIFT]; 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 1330103e9ffba2cba345d0078eb8b8db33249f81840aCraig CorneliusMBCSPostprocess(MBCSData *mbcsData, const UConverterStaticData * /*staticData*/) { 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCMStates *states; 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t maxCharLength, stage3Width; 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru states=&mbcsData->ucm->states; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage3Width=maxCharLength=states->maxCharLength; 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucm_optimizeStates(states, 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &mbcsData->unicodeCodeUnits, 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->toUFallbacks, mbcsData->countToUFallbacks, 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru VERBOSE); 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* try to compact the fromUnicode tables */ 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(transformEUC(mbcsData)) { 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --stage3Width; 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * UTF-8-friendly tries are built precompacted, to cope with variable 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stage 3 allocation block sizes. 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Tables without precision indicators cannot be built that way, 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because if a block was overlapped with a previous one, then a smaller 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point for the same block would not fit. 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Therefore, such tables are not marked UTF-8-friendly and must be 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compacted after all mappings are entered. 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!mbcsData->utf8Friendly) { 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru singleCompactStage3(mbcsData); 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru singleCompactStage2(mbcsData); 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compactStage2(mbcsData); 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(VERBOSE) { 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*uint32_t c, i1, i2, i2Limit, i3;*/ 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("fromUnicode number of uint%s_t in stage 2: 0x%lx=%lu\n", 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxCharLength==1 ? "16" : "32", 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage2Top, 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage2Top); 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("fromUnicode number of %d-byte stage 3 mapping entries: 0x%lx=%lu\n", 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)stage3Width, 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage3Top/stage3Width, 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)mbcsData->stage3Top/stage3Width); 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0; 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i1=0; i1<MBCS_STAGE_1_SIZE; ++i1) { 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i2=mbcsData->stage1[i1]; 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i2==0) { 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c+=MBCS_STAGE_2_BLOCK_SIZE*MBCS_STAGE_3_BLOCK_SIZE; 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i2Limit=i2+MBCS_STAGE_2_BLOCK_SIZE; i2<i2Limit; ++i2) { 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(maxCharLength==1) { 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i3=mbcsData->stage2Single[i2]; 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i3=(uint16_t)mbcsData->stage2[i2]; 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i3==0) { 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c+=MBCS_STAGE_3_BLOCK_SIZE; 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("U+%04lx i1=0x%02lx i2=0x%04lx i3=0x%04lx\n", 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)c, 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)i1, 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)i2, 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (unsigned long)i3); 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c+=MBCS_STAGE_3_BLOCK_SIZE; 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic uint32_t 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruMBCSWrite(NewConverter *cnvData, const UConverterStaticData *staticData, 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNewDataMemory *pData, int32_t tableType) { 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MBCSData *mbcsData=(MBCSData *)cnvData; 1411c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t stage2Start, stage2Length; 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t top, stageUTF8Length=0; 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, stage1Top; 1414c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t headerLength; 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1416103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius _MBCSHeader header=UCNV_MBCS_HEADER_INITIALIZER; 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1418c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Length=mbcsData->stage2Top; 1419c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->omitFromU) { 1420c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find how much of stage2 can be omitted */ 1421c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t utf8Limit=(int32_t)mbcsData->utf8Max+1; 1422c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uint32_t st2=0; /*initialized it to avoid compiler warnings */ 1423c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1424c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru i=utf8Limit>>MBCS_STAGE_1_SHIFT; 1425c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if((utf8Limit&((1<<MBCS_STAGE_1_SHIFT)-1))!=0 && (st2=mbcsData->stage1[i])!=0) { 1426c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* utf8Limit is in the middle of an existing stage 2 block */ 1427c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Start=st2+((utf8Limit>>MBCS_STAGE_2_SHIFT)&MBCS_STAGE_2_BLOCK_MASK); 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1429c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* find the last stage2 block with mappings before utf8Limit */ 1430c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru while(i>0 && (st2=mbcsData->stage1[--i])==0) {} 1431c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* stage2 up to the end of this block corresponds to stageUTF8 */ 1432c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Start=st2+MBCS_STAGE_2_BLOCK_SIZE; 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1434c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.options|=MBCS_OPT_NO_FROM_U; 1435c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.fullStage2Length=stage2Length; 1436c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Length-=stage2Start; 1437c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(VERBOSE) { 1438c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("+ omitting %lu out of %lu stage2 entries and %lu fromUBytes\n", 1439c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (unsigned long)stage2Start, 1440c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (unsigned long)mbcsData->stage2Top, 1441c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru (unsigned long)mbcsData->stage3Top); 1442c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru printf("+ total size savings: %lu bytes\n", (unsigned long)stage2Start*4+mbcsData->stage3Top); 1443c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1444c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1445c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Start=0; 1446c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1447c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1448c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(staticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 1449c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage1Top=MBCS_STAGE_1_SIZE; /* 0x440==1088 */ 1450c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1451c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage1Top=0x40; /* 0x40==64 */ 1452c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1453c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1454c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* adjust stage 1 entries to include the size of stage 1 in the offsets to stage 2 */ 1455c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(mbcsData->ucm->states.maxCharLength==1) { 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<stage1Top; ++i) { 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i]+=(uint16_t)stage1Top; 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1460c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* stage2Top/Length have counted 16-bit results, now we need to count bytes */ 1461c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* also round up to a multiple of 4 bytes */ 1462c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Length=(stage2Length*2+1)&~1; 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* stage3Top has counted 16-bit results, now we need to count bytes */ 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top*=2; 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly) { 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.version[2]=(uint8_t)(SBCS_UTF8_MAX>>8); /* store 0x1f for max==0x1fff */ 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<stage1Top; ++i) { 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage1[i]+=(uint16_t)stage1Top/2; /* stage 2 contains 32-bit entries, stage 1 16-bit entries */ 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1475c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* stage2Top/Length have counted 32-bit results, now we need to count bytes */ 1476c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Length*=4; 1477c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* leave stage2Start counting 32-bit units */ 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->utf8Friendly) { 1480c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stageUTF8Length=(mbcsData->utf8Max+1)>>MBCS_UTF8_STAGE_SHIFT; 1481c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.version[2]=(uint8_t)(mbcsData->utf8Max>>8); /* store 0xd7 for max==0xd7ff */ 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* stage3Top has already counted bytes */ 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1487c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* round up stage3Top so that the sizes of all data blocks are multiples of 4 */ 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->stage3Top=(mbcsData->stage3Top+3)&~3; 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fill the header */ 1491c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(header.options&MBCS_OPT_INCOMPATIBLE_MASK) { 1492c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.version[0]=5; 1493c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(header.options&MBCS_OPT_NO_FROM_U) { 1494c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru headerLength=10; /* include fullStage2Length */ 1495c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1496c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru headerLength=MBCS_HEADER_V5_MIN_LENGTH; /* 9 */ 1497c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1498c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 1499c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.version[0]=4; 1500c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru headerLength=MBCS_HEADER_V4_LENGTH; /* 8 */ 1501c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 15028393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius header.version[1]=4; 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* header.version[2] set above for utf8Friendly data */ 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1505c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru header.options|=(uint32_t)headerLength; 1506c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.countStates=mbcsData->ucm->states.countStates; 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.countToUFallbacks=mbcsData->countToUFallbacks; 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.offsetToUCodeUnits= 1511c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru headerLength*4+ 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->ucm->states.countStates*1024+ 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback); 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.offsetFromUTable= 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.offsetToUCodeUnits+ 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru mbcsData->ucm->states.countToUCodeUnits*2; 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.offsetFromUBytes= 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.offsetFromUTable+ 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stage1Top*2+ 1520c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru stage2Length; 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.fromUBytesLength=mbcsData->stage3Top; 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1523c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru top=header.offsetFromUBytes+stageUTF8Length*2; 1524c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!(header.options&MBCS_OPT_NO_FROM_U)) { 1525c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru top+=header.fromUBytesLength; 1526c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.flags=(uint8_t)(mbcsData->ucm->states.outputType); 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(tableType&TABLE_EXT) { 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(top>0xffffff) { 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "error: offset 0x%lx to extension table exceeds 0xffffff\n", (long)top); 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru header.flags|=top<<8; 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* write the MBCS data */ 1540c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru udata_writeBlock(pData, &header, headerLength*4); 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->ucm->states.stateTable, header.countStates*1024); 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->toUFallbacks, mbcsData->countToUFallbacks*sizeof(_MBCSToUFallback)); 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->unicodeCodeUnits, mbcsData->ucm->states.countToUCodeUnits*2); 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->stage1, stage1Top*2); 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(mbcsData->ucm->states.maxCharLength==1) { 1546c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->stage2Single+stage2Start, stage2Length); 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1548c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->stage2+stage2Start, stage2Length); 1549c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 1550c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(!(header.options&MBCS_OPT_NO_FROM_U)) { 1551c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->fromUBytes, mbcsData->stage3Top); 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(stageUTF8Length>0) { 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_writeBlock(pData, mbcsData->stageUTF8, stageUTF8Length*2); 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* return the number of bytes that should have been written */ 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return top; 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1561