1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (C) 2003-2009, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru****************************************************************************** 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: ucnv_ext.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003jun13 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Conversion extensions 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_ext.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */ 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return lookup value for the byte, if found; else 0 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE uint32_t 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t word0, word; 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, start, limit; 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check the input byte against the lowest and highest section bytes */ 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(byte<start || limit<byte) { 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* the byte is out of range */ 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length==((limit-start)+1)) { 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* direct access on a linear array */ 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Shift byte once instead of each section word and add 0xffffff. 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We will compare the shifted/added byte (bbffffff) against 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * section words which have byte values in the same bit position. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for all v=0..f 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * so we need not mask off the lower 24 bits of each section word. 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru word=word0|UCNV_EXT_TO_U_VALUE_MASK; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary search */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=0; 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=length; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=limit-start; 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=1) { 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* done */ 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start<limit-1 */ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=4) { 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* linear search for the last part */ 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(word0<=toUSection[start]) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && word0<=toUSection[start]) { 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && word0<=toUSection[start]) { 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* always break at start==limit-1 */ 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++start; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=(start+limit)/2; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(word<toUSection[i]) { 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=i; 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=i; 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* did we really find it? */ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* not found */ 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRUE if not an SI/SO stateful converter, 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the match length fits with the current converter state 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((sisoState)<0 || ((sisoState)==0) == (match==1)) 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this works like ucnv_extMatchFromU() except 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the first character is in pre 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - no trie is used 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the returned matchLength is not offset by 2 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchToU(const int32_t *cx, int8_t sisoState, 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *pre, int32_t preLength, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *src, int32_t srcLength, 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *pMatchValue, 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback, UBool flush) { 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *toUTable, *toUSection; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, matchValue; 12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, j, idx, length, matchLength; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t b; 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no extension data, no match */ 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize */ 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); 13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=0; 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=0; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j=matchLength=0; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(sisoState==0) { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(preLength>1) { 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no match of a DBCS sequence in SBCS mode */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(preLength==1) { 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru srcLength=0; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* preLength==0 */ { 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(srcLength>1) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru srcLength=1; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru flush=TRUE; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we must not remember fallback matches when not using fallbacks */ 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match input units until there is a full match or the input is consumed */ 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go to the next section */ 16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho toUSection=toUTable+idx; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*toUSection++; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_TO_U_GET_BYTE(value); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=UCNV_EXT_TO_U_GET_VALUE(value); 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( value!=0 && 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TO_U_USE_FALLBACK(useFallback)) && 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember longest match so far */ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=i+j; 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match pre[] then src[] */ 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<preLength) { 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=(uint8_t)pre[i++]; 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(j<srcLength) { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru b=(uint8_t)src[j++]; 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all input consumed, partial match */ 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of the entire input stream, stop with the longest match so far 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or: partial match must not be longer than UCNV_EXT_MAX_BYTES 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because it must fit into state buffers 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with more input next time */ 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -length; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* search for the current UChar */ 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=ucnv_extFindToU(toUSection, length, b); 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match here, stop with the longest match so far */ 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, continue */ 20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TO_U_USE_FALLBACK(useFallback)) && 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=i+j; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match on fallback not taken, stop with the longest match so far */ 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchLength==0) { 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match at all */ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return result */ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return matchLength; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteToU(UConverter *cnv, const int32_t *cx, 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the result */ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output a single code point */ 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUWriteCodePoint( 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output a string - with correct data we have resultLength>0 */ 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUWriteUChars( 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv, 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_GET_INDEX(value), 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_TO_U_GET_LENGTH(value), 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or 1 for DBCS-only, 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or -1 if the converter is not SI/SO stateful 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: For SI/SO stateful converters getting here, 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cnv->mode==0 is equivalent to firstLength==1. 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_SISO_STATE(cnv) \ 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t firstLength, 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char **src, const char *srcLimit, 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar **target, const UChar *targetLimit, 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool flush, 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 28285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (const char *)cnv->toUBytes, firstLength, 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src, (int32_t)(srcLimit-*src), 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, flush); 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>0) { 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src+=match-firstLength; 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result to target */ 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteToU(cnv, cx, 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *s; 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the first code point */ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=(const char *)cnv->toUBytes; 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToUFirstLength=(int8_t)firstLength; 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<firstLength; ++j) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy the newly consumed input */ 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=*src; 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match; 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(; j<match; ++j) { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src=s; /* same as *src=srcLimit; because we reached the end of input */ 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)match; 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 no match */ { 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchToU(const int32_t *cx, 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *source, int32_t length, 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) { 33285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=0) { 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xffff; 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cx, -1, 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source, length, 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, TRUE); 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match==length) { 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result for simple, single-character conversion */ 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return UCNV_EXT_TO_U_GET_CODE_POINT(value); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return no match because 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>0 && value points to string: simple conversion cannot handle multiple code points 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>0 && match!=length: not all input consumed, forbidden for this function 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==0: no match found in the first place 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0xfffe; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchToU(UConverter *cnv, 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUnicodeArgs *pArgs, int32_t srcIndex, 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 37085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match, length; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU, cnv->preToULength, 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, pArgs->flush); 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>0) { 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=cnv->preToULength) { 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source+=match-cnv->preToULength; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=0; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the match did not use all of preToU[] - keep the rest for replay */ 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->preToULength-match; 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preToU, cnv->preToU+match, length); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)-length; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result */ 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, srcIndex, 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *s; 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* just _append_ the newly consumed input to preToU[] */ 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=pArgs->source; 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match; 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=cnv->preToULength; j<match; ++j) { 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToU[j]=*s++; 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)match; 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 */ { 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no match 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to split the previous input into two parts: 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. The first codepage character is unmappable - that's how we got into 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trying the extension data in the first place. 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to move it from the preToU buffer 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the error buffer, set an error code, 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and prepare the rest of the previous input for 2. 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. The rest of the previous input must be converted once we 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * come back from the callback for the first character. 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At that time, we have to try again from scratch to convert 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these input characters. 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The replay will be handled by the ucnv.c conversion code. 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the first codepage character to the error field */ 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->toULength=cnv->preToUFirstLength; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the rest up inside the buffer */ 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=cnv->preToULength-cnv->preToUFirstLength; 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length>0) { 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mark preToU for replay */ 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preToULength=(int8_t)-length; 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the error code for unassigned */ 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INVALID_CHAR_FOUND; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */ 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return index of the UChar, if found; else <0 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE int32_t 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, start, limit; 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* binary search */ 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=0; 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=length; 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=limit-start; 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=1) { 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* done */ 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* start<limit-1 */ 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<=4) { 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* linear search for the last part */ 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u<=fromUSection[start]) { 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && u<=fromUSection[start]) { 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(++start<limit && u<=fromUSection[start]) { 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* always break at start==limit-1 */ 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++start; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=(start+limit)/2; 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(u<fromUSection[i]) { 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=i; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start=i; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* did we really find it? */ 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(start<limit && u==fromUSection[start]) { 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return start; 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -1; /* not found */ 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param cx pointer to extension data; if NULL, returns 0 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param firstCP the first code point before all the other UChars 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pre UChars that must match; !initialMatch: partial match with them 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param preLength length of pre, >=0 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param src UChars that can be used to complete a match 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param srcLength length of src, >=0 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pMatchValue [out] output result value for the match from the data structure 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param useFallback "use fallback" flag, usually from cnv->useFallback 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param flush TRUE if the end of the input stream is reached 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return >1: matched, return value=total match length (number of input units matched) 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1: matched, no mapping but request for <subchar1> 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (only for the first code point) 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0: no match 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <0: partial match, return value=negative total match length 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (partial matches are never returned for flush==TRUE) 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the matchLength is 2 if only firstCP matched, and >2 if firstCP and 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * further code units matched 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchFromU(const int32_t *cx, 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 firstCP, 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *pre, int32_t preLength, 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *src, int32_t srcLength, 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t *pMatchValue, 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback, UBool flush) { 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *stage12, *stage3; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *stage3b; 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *fromUTableUChars, *fromUSectionUChars; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *fromUTableValues, *fromUSectionValues; 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, matchValue; 53185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i, j, idx, length, matchLength; 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c; 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL) { 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* no extension data, no match */ 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* trie lookup of firstCP */ 53985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=firstCP>>10; /* stage 1 index */ 54085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; /* the first code point is outside the trie */ 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=stage3b[idx]; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do not interpret values with reserved bits used, for forward compatibility, 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and do not even remember intermediate results with reserved bits used. 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, enter the loop below */ 56285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize */ 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=0; 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i=j=matchLength=0; 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we must not remember fallback matches when not using fallbacks */ 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match input units until there is a full match or the input is consumed */ 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* go to the next section */ 57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fromUSectionUChars=fromUTableUChars+idx; 57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fromUSectionValues=fromUTableValues+idx; 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=*fromUSectionUChars++; 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*fromUSectionValues++; 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( value!=0 && 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FROM_U_USE_FALLBACK(useFallback, firstCP)) && 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* remember longest match so far */ 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2+i+j; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* match pre[] then src[] */ 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(i<preLength) { 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=pre[i++]; 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(j<srcLength) { 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=src[j++]; 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* all input consumed, partial match */ 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of the entire input stream, stop with the longest match so far 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * because it must fit into state buffers 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* continue with more input next time */ 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return -(2+length); 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* search for the current UChar */ 61385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=ucnv_extFindFromU(fromUSectionUChars, length, c); 61485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(idx<0) { 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match here, stop with the longest match so far */ 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 61885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho value=fromUSectionValues[idx]; 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* partial match, continue */ 62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FROM_U_USE_FALLBACK(useFallback, firstCP)) && 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2+i+j; 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match on fallback not taken, stop with the longest match so far */ 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchLength==0) { 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no match at all */ 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* result from firstCP trie lookup */ { 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FROM_U_USE_FALLBACK(useFallback, firstCP)) && 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* full match, stop with result */ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchValue=value; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru matchLength=2; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fallback not taken */ 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* return result */ 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 1; /* assert matchLength==2 */ 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pMatchValue=matchValue; 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return matchLength; 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value, 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char **target, const char *targetLimit, 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *result; 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length, prevLength; 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_FROM_U_GET_LENGTH(value); 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* output the result */ 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Generate a byte array and then write it below. 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is not the fastest possible way, but it should be ok for 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * extension mappings, and it is much simpler. 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Offset and overflow handling are only done once this way. 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(length) { 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 3: 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(value>>16); 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 2: 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)(value>>8); 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 1: 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *p++=(uint8_t)value; 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; /* will never occur */ 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=buffer+1; 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* with correct data we have length>0 */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((prevLength=cnv->fromUnicodeStatus)!=0) { 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* handle SI/SO stateful output */ 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint8_t shiftByte; 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(prevLength>1 && length==1) { 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change from double-byte mode to single-byte */ 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=(uint8_t)UCNV_SI; 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=1; 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(prevLength==1 && length>1) { 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* change from single-byte mode to double-byte */ 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=(uint8_t)UCNV_SO; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUnicodeStatus=2; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru shiftByte=0; 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(shiftByte!=0) { 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* prepend the shift byte to the result bytes */ 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[0]=shiftByte; 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(result!=buffer+1) { 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(buffer+1, result, length); 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result=buffer; 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++length; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_fromUWriteBytes(cnv, (const char *)result, length, 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 cp, 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar **src, const UChar *srcLimit, 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char **target, const char *targetLimit, 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t **offsets, int32_t srcIndex, 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool flush, 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cx, cp, 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src, (int32_t)(srcLimit-*src), 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, flush); 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reject a match if the result is a single byte for DBCS-only */ 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( match>=2 && 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src+=match-2; /* remove 2 for the initial code point */ 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result to target */ 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteFromU(cnv, cx, 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target, targetLimit, 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offsets, srcIndex, 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *s; 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the first code point */ 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=cp; 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now copy the newly consumed input */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=*src; 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match-2; /* remove 2 for the initial code point */ 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=0; j<match; ++j) { 788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU[j]=*s++; 789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *src=s; /* same as *src=srcLimit; because we reached the end of input */ 791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)match; 792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match==1) { 794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* matched, no mapping but request for <subchar1> */ 795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useSubChar1=TRUE; 796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 no match */ { 798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Used by ISO 2022 implementation. 804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping 805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int32_t 807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchFromU(const int32_t *cx, 808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 cp, uint32_t *pValue, 809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool useFallback) { 810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* try to match */ 814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cx, 815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cp, 816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NULL, 0, 818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru useFallback, TRUE); 820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=2) { 821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result for simple, single-character conversion */ 822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int isRoundtrip; 824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); 826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=UCNV_EXT_FROM_U_GET_LENGTH(value); 827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pValue=value; 831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return isRoundtrip ? length : -length; 832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 /* not currently used */ 833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(length==4) { 834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* de-serialize a 4-byte result */ 835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pValue= 837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[0]<<24)| 838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[1]<<16)| 839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ((uint32_t)result[2]<<8)| 840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result[3]; 841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return isRoundtrip ? 4 : -4; 842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * return no match because 848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match>1 && resultLength>4: result too long for simple conversion 849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==1: no match found, <subchar1> preferred 850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match==0: no match found in the first place 851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input, requires cnv->preFromUFirstCP>=0 858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion 859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchFromU(UConverter *cnv, 862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, 863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t match; 866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, 868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP, 869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU, cnv->preFromULength, 870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &value, 872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useFallback, pArgs->flush); 873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=2) { 874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match-=2; /* remove 2 for the initial code point */ 875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match>=cnv->preFromULength) { 877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* advance src pointer for the consumed input */ 878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source+=match-cnv->preFromULength; 879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=0; 880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the match did not use all of preFromU[] - keep the rest for replay */ 882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length=cnv->preFromULength-match; 883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); 884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)-length; 885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finish the partial match */ 888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=U_SENTINEL; 889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write result */ 891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, 892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value, 893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->target, pArgs->targetLimit, 894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &pArgs->offsets, srcIndex, 895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(match<0) { 897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* save state for partial match */ 898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *s; 899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j; 900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* just _append_ the newly consumed input to preFromU[] */ 902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s=pArgs->source; 903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru match=-match-2; /* remove 2 for the initial code point */ 904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(j=cnv->preFromULength; j<match; ++j) { 905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromU[j]=*s++; 906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=(int8_t)match; 909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else /* match==0 or 1 */ { 910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * no match 912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to split the previous input into two parts: 914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. The first code point is unmappable - that's how we got into 916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * trying the extension data in the first place. 917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * We need to move it from the preFromU buffer 918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the error buffer, set an error code, 919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and prepare the rest of the previous input for 2. 920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. The rest of the previous input must be converted once we 922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * come back from the callback for the first code point. 923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * At that time, we have to try again from scratch to convert 924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * these input characters. 925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The replay will be handled by the ucnv.c conversion code. 926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(match==1) { 929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* matched, no mapping but request for <subchar1> */ 930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->useSubChar1=TRUE; 931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* move the first code point to the error field */ 934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->fromUChar32=cnv->preFromUFirstCP; 935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromUFirstCP=U_SENTINEL; 936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* mark preFromU for replay */ 938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cnv->preFromULength=-cnv->preFromULength; 939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the error code for unassigned */ 941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *pErrorCode=U_INVALID_CHAR_FOUND; 942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, 947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx, 948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 949c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UBool useFallback, 950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t minLength, 951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c, 952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, 953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sectionIndex, 954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UChar *fromUSectionUChars; 956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *fromUSectionValues; 957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i, count; 960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; 962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; 963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read first pair of the section */ 965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count=*fromUSectionUChars++; 966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=*fromUSectionValues++; 967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( value!=0 && 969c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) && 970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c>=0) { 973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add the initial code point */ 974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->add(sa->set, c); 975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* add the string so far */ 977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addString(sa->set, s, length); 978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<count; ++i) { 982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* append this code unit and recurse or add the string */ 983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s[length]=fromUSectionUChars[i]; 984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=fromUSectionValues[i]; 985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no mapping, do nothing */ 988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extGetUnicodeSetString( 990c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sharedData, cx, sa, useFallback, minLength, 991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_SENTINEL, s, length+1, 992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 994c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if((useFallback ? 995c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : 996c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== 997c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && 998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->addString(sa->set, s, length+1); 1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void 1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, 1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const USetAdder *sa, 1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterUnicodeSet which, 1009c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UConverterSetFilter filter, 1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode *pErrorCode) { 1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t *cx; 1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint16_t *stage12, *stage3, *ps2, *ps3; 1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *stage3b; 1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t value; 1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t st1, stage1Length, st2, st3, minLength; 1017c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UBool useFallback; 1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar s[UCNV_EXT_MAX_UCHARS]; 1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cx=sharedData->mbcs.extIndexes; 1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cx==NULL) { 1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; 1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1034c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); 1035c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru 1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* enumerate the from-Unicode trie table */ 1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0; /* keep track of the current code point while enumerating */ 1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1039c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(filter==UCNV_SET_FILTER_2022_CN) { 1040c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru minLength=3; 1041c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || 1042c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru filter!=UCNV_SET_FILTER_NONE 1043c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ) { 1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* DBCS-only, ignore single-byte results */ 1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength=2; 1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru minLength=1; 1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the trie enumeration is almost the same as 1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(st1=0; st1<stage1Length; ++st1) { 1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru st2=stage12[st1]; 1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(st2>stage1Length) { 1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ps2=stage12+st2; 1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(st2=0; st2<64; ++st2) { 1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { 1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read the stage 3 block */ 1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ps3=stage3+st3; 1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Add code points for which the roundtrip flag is set. 1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do not add <subchar1> entries or other (future?) pseudo-entries 1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with an output length of 0, or entries with reserved bits set. 1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Recurse for partial results. 1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru do { 1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=stage3b[*ps3++]; 1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value==0) { 1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* no mapping, do nothing */ 1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length=0; 1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U16_APPEND_UNSAFE(s, length, c); 1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_extGetUnicodeSetString( 1077c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru sharedData, cx, sa, useFallback, minLength, 1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c, s, length, 1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pErrorCode); 1081c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } else if((useFallback ? 1082c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : 1083c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== 1084c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && 1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 1087c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru switch(filter) { 1088c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_2022_CN: 1089c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { 1090c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1091c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1092c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1093c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_SJIS: 1094c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { 1095c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1096c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1097c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1098c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_GR94DBCS: 1099c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1100c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && 1101c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1102c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1103c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1104c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1105c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru case UCNV_SET_FILTER_HZ: 1106c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1107c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && 1108c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1109c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru continue; 1110c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1111c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1112c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru default: 1113c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru /* 1114c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * UCNV_SET_FILTER_NONE, 1115c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength 1116c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru */ 1117c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru break; 1118c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru } 1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sa->add(sa->set, c); 1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } while((++c&0xf)!=0); 1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c+=16; /* empty stage 3 block */ 1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c+=1024; /* empty stage 2 block */ 1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1133