1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2003-2013, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 8103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* file name: ucnv_ext.cpp 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003jun13 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Conversion extensions 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_ext.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 28103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return lookup value for the byte, if found; else 0 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t word0, word; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, start, limit; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check the input byte against the lowest and highest section bytes */ 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(byte<start || limit<byte) { 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* the byte is out of range */ 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length==((limit-start)+1)) { 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* direct access on a linear array */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Shift byte once instead of each section word and add 0xffffff. 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will compare the shifted/added byte (bbffffff) against 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * section words which have byte values in the same bit position. 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for all v=0..f 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so we need not mask off the lower 24 bits of each section word. 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru word=word0|UCNV_EXT_TO_U_VALUE_MASK; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary search */ 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=0; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=length; 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=limit-start; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=1) { 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* done */ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start<limit-1 */ 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=4) { 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* linear search for the last part */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(word0<=toUSection[start]) { 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && word0<=toUSection[start]) { 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && word0<=toUSection[start]) { 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* always break at start==limit-1 */ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++start; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=(start+limit)/2; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(word<toUSection[i]) { 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=i; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=i; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* did we really find it? */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* not found */ 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRUE if not an SI/SO stateful converter, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the match length fits with the current converter state 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((sisoState)<0 || ((sisoState)==0) == (match==1)) 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this works like ucnv_extMatchFromU() except 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the first character is in pre 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - no trie is used 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the returned matchLength is not offset by 2 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchToU(const int32_t *cx, int8_t sisoState, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *pre, int32_t preLength, 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *src, int32_t srcLength, 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *pMatchValue, 125103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UBool /*useFallback*/, UBool flush) { 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *toUTable, *toUSection; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, matchValue; 12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, j, idx, length, matchLength; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no extension data, no match */ 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize */ 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); 13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=0; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=0; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j=matchLength=0; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sisoState==0) { 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(preLength>1) { 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no match of a DBCS sequence in SBCS mode */ 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(preLength==1) { 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru srcLength=0; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* preLength==0 */ { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(srcLength>1) { 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru srcLength=1; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flush=TRUE; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we must not remember fallback matches when not using fallbacks */ 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match input units until there is a full match or the input is consumed */ 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go to the next section */ 16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho toUSection=toUTable+idx; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*toUSection++; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_TO_U_GET_BYTE(value); 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=UCNV_EXT_TO_U_GET_VALUE(value); 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( value!=0 && 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TO_U_USE_FALLBACK(useFallback)) && 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember longest match so far */ 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=i+j; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match pre[] then src[] */ 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<preLength) { 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=(uint8_t)pre[i++]; 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(j<srcLength) { 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=(uint8_t)src[j++]; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all input consumed, partial match */ 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of the entire input stream, stop with the longest match so far 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or: partial match must not be longer than UCNV_EXT_MAX_BYTES 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because it must fit into state buffers 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with more input next time */ 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -length; 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* search for the current UChar */ 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=ucnv_extFindToU(toUSection, length, b); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match here, stop with the longest match so far */ 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, continue */ 20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TO_U_USE_FALLBACK(useFallback)) && 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=i+j; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match on fallback not taken, stop with the longest match so far */ 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchLength==0) { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match at all */ 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return result */ 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return matchLength; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteToU(UConverter *cnv, const int32_t *cx, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the result */ 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output a single code point */ 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUWriteCodePoint( 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output a string - with correct data we have resultLength>0 */ 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUWriteUChars( 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_GET_INDEX(value), 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_GET_LENGTH(value), 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or 1 for DBCS-only, 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or -1 if the converter is not SI/SO stateful 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: For SI/SO stateful converters getting here, 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cnv->mode==0 is equivalent to firstLength==1. 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_SISO_STATE(cnv) \ 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t firstLength, 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char **src, const char *srcLimit, 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool flush, 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 28385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (const char *)cnv->toUBytes, firstLength, 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src, (int32_t)(srcLimit-*src), 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, flush); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>0) { 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src+=match-firstLength; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result to target */ 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteToU(cnv, cx, 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *s; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the first code point */ 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const char *)cnv->toUBytes; 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToUFirstLength=(int8_t)firstLength; 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<firstLength; ++j) { 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy the newly consumed input */ 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=*src; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(; j<match; ++j) { 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src=s; /* same as *src=srcLimit; because we reached the end of input */ 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)match; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 no match */ { 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchToU(const int32_t *cx, 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *source, int32_t length, 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) { 33385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cx, -1, 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source, length, 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, TRUE); 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match==length) { 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result for simple, single-character conversion */ 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_CODE_POINT(value); 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return no match because 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>0 && value points to string: simple conversion cannot handle multiple code points 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>0 && match!=length: not all input consumed, forbidden for this function 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==0: no match found in the first place 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xfffe; 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchToU(UConverter *cnv, 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs *pArgs, int32_t srcIndex, 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 37185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match, length; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU, cnv->preToULength, 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, pArgs->flush); 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>0) { 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=cnv->preToULength) { 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source+=match-cnv->preToULength; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=0; 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the match did not use all of preToU[] - keep the rest for replay */ 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->preToULength-match; 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preToU, cnv->preToU+match, length); 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)-length; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result */ 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, srcIndex, 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *s; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* just _append_ the newly consumed input to preToU[] */ 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=pArgs->source; 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match; 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=cnv->preToULength; j<match; ++j) { 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)match; 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 */ { 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no match 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to split the previous input into two parts: 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. The first codepage character is unmappable - that's how we got into 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trying the extension data in the first place. 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to move it from the preToU buffer 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the error buffer, set an error code, 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and prepare the rest of the previous input for 2. 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. The rest of the previous input must be converted once we 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * come back from the callback for the first character. 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At that time, we have to try again from scratch to convert 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these input characters. 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The replay will be handled by the ucnv.c conversion code. 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the first codepage character to the error field */ 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=cnv->preToUFirstLength; 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the rest up inside the buffer */ 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->preToULength-cnv->preToUFirstLength; 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mark preToU for replay */ 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)-length; 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the error code for unassigned */ 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INVALID_CHAR_FOUND; 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */ 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// Use roundtrips, "good one-way" mappings, and some normal fallbacks. 4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic inline UBool 4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusextFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { 4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return 4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || 4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius FROM_U_USE_FALLBACK(useFallback, firstCP)) && 4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; 4568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 4578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return index of the UChar, if found; else <0 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 461103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline int32_t 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, start, limit; 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary search */ 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=0; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=length; 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=limit-start; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=1) { 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* done */ 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start<limit-1 */ 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=4) { 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* linear search for the last part */ 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u<=fromUSection[start]) { 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && u<=fromUSection[start]) { 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && u<=fromUSection[start]) { 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* always break at start==limit-1 */ 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++start; 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=(start+limit)/2; 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u<fromUSection[i]) { 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=i; 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=i; 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* did we really find it? */ 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(start<limit && u==fromUSection[start]) { 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return start; 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* not found */ 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param cx pointer to extension data; if NULL, returns 0 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param firstCP the first code point before all the other UChars 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pre UChars that must match; !initialMatch: partial match with them 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param preLength length of pre, >=0 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param src UChars that can be used to complete a match 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param srcLength length of src, >=0 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pMatchValue [out] output result value for the match from the data structure 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param useFallback "use fallback" flag, usually from cnv->useFallback 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param flush TRUE if the end of the input stream is reached 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return >1: matched, return value=total match length (number of input units matched) 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1: matched, no mapping but request for <subchar1> 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (only for the first code point) 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0: no match 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <0: partial match, return value=negative total match length 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (partial matches are never returned for flush==TRUE) 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the matchLength is 2 if only firstCP matched, and >2 if firstCP and 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * further code units matched 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchFromU(const int32_t *cx, 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 firstCP, 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *pre, int32_t preLength, 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *src, int32_t srcLength, 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *pMatchValue, 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback, UBool flush) { 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *stage12, *stage3; 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *stage3b; 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *fromUTableUChars, *fromUSectionUChars; 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *fromUTableValues, *fromUSectionValues; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, matchValue; 54185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, j, idx, length, matchLength; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c; 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL) { 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no extension data, no match */ 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trie lookup of firstCP */ 54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=firstCP>>10; /* stage 1 index */ 55085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* the first code point is outside the trie */ 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 55985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=stage3b[idx]; 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do not interpret values with reserved bits used, for forward compatibility, 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and do not even remember intermediate results with reserved bits used. 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, enter the loop below */ 57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize */ 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=0; 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j=matchLength=0; 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we must not remember fallback matches when not using fallbacks */ 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match input units until there is a full match or the input is consumed */ 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go to the next section */ 58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fromUSectionUChars=fromUTableUChars+idx; 58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fromUSectionValues=fromUTableValues+idx; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=*fromUSectionUChars++; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*fromUSectionValues++; 5928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember longest match so far */ 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2+i+j; 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match pre[] then src[] */ 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<preLength) { 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=pre[i++]; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(j<srcLength) { 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=src[j++]; 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all input consumed, partial match */ 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of the entire input stream, stop with the longest match so far 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because it must fit into state buffers 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with more input next time */ 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -(2+length); 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* search for the current UChar */ 61985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=ucnv_extFindFromU(fromUSectionUChars, length, c); 62085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(idx<0) { 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match here, stop with the longest match so far */ 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 62485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=fromUSectionValues[idx]; 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, continue */ 62785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 6298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(extFromUUseMapping(useFallback, value, firstCP)) { 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2+i+j; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match on fallback not taken, stop with the longest match so far */ 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchLength==0) { 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match at all */ 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* result from firstCP trie lookup */ { 6468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(extFromUUseMapping(useFallback, value, firstCP)) { 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fallback not taken */ 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return result */ 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1; /* assert matchLength==2 */ 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pMatchValue=matchValue; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return matchLength; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 668103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char **target, const char *targetLimit, 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *result; 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length, prevLength; 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_FROM_U_GET_LENGTH(value); 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the result */ 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate a byte array and then write it below. 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is not the fastest possible way, but it should be ok for 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * extension mappings, and it is much simpler. 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Offset and overflow handling are only done once this way. 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(value>>16); 693103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 2: /*fall through*/ 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(value>>8); 695103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 1: /*fall through*/ 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)value; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* will never occur */ 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=buffer+1; 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* with correct data we have length>0 */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((prevLength=cnv->fromUnicodeStatus)!=0) { 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* handle SI/SO stateful output */ 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t shiftByte; 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prevLength>1 && length==1) { 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change from double-byte mode to single-byte */ 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=(uint8_t)UCNV_SI; 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=1; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(prevLength==1 && length>1) { 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change from single-byte mode to double-byte */ 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=(uint8_t)UCNV_SO; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=2; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=0; 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(shiftByte!=0) { 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* prepend the shift byte to the result bytes */ 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0]=shiftByte; 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(result!=buffer+1) { 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer+1, result, length); 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=buffer; 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, (const char *)result, length, 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 cp, 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar **src, const UChar *srcLimit, 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char **target, const char *targetLimit, 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool flush, 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cx, cp, 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src, (int32_t)(srcLimit-*src), 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, flush); 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reject a match if the result is a single byte for DBCS-only */ 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( match>=2 && 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src+=match-2; /* remove 2 for the initial code point */ 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result to target */ 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteFromU(cnv, cx, 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *s; 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the first code point */ 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=cp; 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy the newly consumed input */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=*src; 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match-2; /* remove 2 for the initial code point */ 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<match; ++j) { 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU[j]=*s++; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src=s; /* same as *src=srcLimit; because we reached the end of input */ 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)match; 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match==1) { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* matched, no mapping but request for <subchar1> */ 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useSubChar1=TRUE; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 no match */ { 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Used by ISO 2022 implementation. 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int32_t 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchFromU(const int32_t *cx, 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 cp, uint32_t *pValue, 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) { 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cx, 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp, 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, TRUE); 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=2) { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result for simple, single-character conversion */ 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int isRoundtrip; 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_FROM_U_GET_LENGTH(value); 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pValue=value; 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return isRoundtrip ? length : -length; 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 /* not currently used */ 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(length==4) { 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* de-serialize a 4-byte result */ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pValue= 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[0]<<24)| 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[1]<<16)| 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[2]<<8)| 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result[3]; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return isRoundtrip ? 4 : -4; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return no match because 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>1 && resultLength>4: result too long for simple conversion 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==1: no match found, <subchar1> preferred 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==0: no match found in the first place 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input, requires cnv->preFromUFirstCP>=0 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchFromU(UConverter *cnv, 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP, 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU, cnv->preFromULength, 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, pArgs->flush); 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=2) { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match-=2; /* remove 2 for the initial code point */ 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=cnv->preFromULength) { 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source+=match-cnv->preFromULength; 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=0; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the match did not use all of preFromU[] - keep the rest for replay */ 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length=cnv->preFromULength-match; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)-length; 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finish the partial match */ 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=U_SENTINEL; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result */ 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, srcIndex, 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *s; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* just _append_ the newly consumed input to preFromU[] */ 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=pArgs->source; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match-2; /* remove 2 for the initial code point */ 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=cnv->preFromULength; j<match; ++j) { 905103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(j>=0); 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU[j]=*s++; 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)match; 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 or 1 */ { 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no match 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to split the previous input into two parts: 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. The first code point is unmappable - that's how we got into 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trying the extension data in the first place. 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to move it from the preFromU buffer 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the error buffer, set an error code, 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and prepare the rest of the previous input for 2. 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. The rest of the previous input must be converted once we 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * come back from the callback for the first code point. 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At that time, we have to try again from scratch to convert 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these input characters. 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The replay will be handled by the ucnv.c conversion code. 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match==1) { 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* matched, no mapping but request for <subchar1> */ 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useSubChar1=TRUE; 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the first code point to the error field */ 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=cnv->preFromUFirstCP; 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=U_SENTINEL; 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mark preFromU for replay */ 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=-cnv->preFromULength; 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the error code for unassigned */ 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INVALID_CHAR_FOUND; 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 9468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool 9478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusextSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { 9488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(which==UCNV_ROUNDTRIP_SET) { 9498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Add only code points for which the roundtrip flag is set. 9508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Do not add any fallbacks, even if ucnv_fromUnicode() would use them 9518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). 9528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // 9538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // By analogy, also do not add "good one-way" mappings. 9548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // 9558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Do not add entries with reserved bits set. 9568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= 9578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { 9588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return FALSE; 9598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 9608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { 9618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Do not add entries with reserved bits set. 9628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { 9638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return FALSE; 9648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 9658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 9668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Do not add <subchar1> entries or other (future?) pseudo-entries 9678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // with an output length of 0. 9688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; 9698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 9708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx, 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 9758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UConverterUnicodeSet which, 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t minLength, 9778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UChar32 firstCP, 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sectionIndex, 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *fromUSectionUChars; 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *fromUSectionValues; 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, count; 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=*fromUSectionUChars++; 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*fromUSectionValues++; 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 9948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(extSetUseMapping(which, minLength, value)) { 9958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(length==U16_LENGTH(firstCP)) { 996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add the initial code point */ 9978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sa->add(sa->set, firstCP); 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add the string so far */ 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addString(sa->set, s, length); 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<count; ++i) { 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append this code unit and recurse or add the string */ 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s[length]=fromUSectionUChars[i]; 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=fromUSectionValues[i]; 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no mapping, do nothing */ 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extGetUnicodeSetString( 10138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sharedData, cx, sa, which, minLength, 10148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius firstCP, s, length+1, 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 10178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else if(extSetUseMapping(which, minLength, value)) { 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addString(sa->set, s, length+1); 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 1027c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UConverterSetFilter filter, 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx; 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *stage12, *stage3, *ps2, *ps3; 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *stage3b; 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t st1, stage1Length, st2, st3, minLength; 1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar s[UCNV_EXT_MAX_UCHARS]; 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cx=sharedData->mbcs.extIndexes; 1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL) { 1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* enumerate the from-Unicode trie table */ 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* keep track of the current code point while enumerating */ 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1054c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(filter==UCNV_SET_FILTER_2022_CN) { 1055c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru minLength=3; 1056c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || 1057c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter!=UCNV_SET_FILTER_NONE 1058c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* DBCS-only, ignore single-byte results */ 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength=2; 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength=1; 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the trie enumeration is almost the same as 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(st1=0; st1<stage1Length; ++st1) { 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru st2=stage12[st1]; 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(st2>stage1Length) { 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ps2=stage12+st2; 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(st2=0; st2<64; ++st2) { 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read the stage 3 block */ 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ps3=stage3+st3; 1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=stage3b[*ps3++]; 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no mapping, do nothing */ 1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 10838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Recurse for partial results. 1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND_UNSAFE(s, length, c); 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extGetUnicodeSetString( 10878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius sharedData, cx, sa, which, minLength, 1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c, s, length, 1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 10918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else if(extSetUseMapping(which, minLength, value)) { 1092c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru switch(filter) { 1093c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_2022_CN: 1094c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { 1095c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1096c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1097c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1098c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_SJIS: 1099c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { 1100c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1101c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1102c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1103c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_GR94DBCS: 1104c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1105c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && 1106c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1107c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1108c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1109c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1110c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_HZ: 1111c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1112c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && 1113c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1114c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1115c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1116c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1117c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru default: 1118c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 1119c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * UCNV_SET_FILTER_NONE, 1120c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength 1121c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1122c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1123c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->add(sa->set, c); 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while((++c&0xf)!=0); 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c+=16; /* empty stage 3 block */ 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c+=1024; /* empty stage 2 block */ 1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1138