1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Copyright (C) 2003-2009, International Business Machines 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Corporation and others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)****************************************************************************** 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* file name: ucnv_ext.c 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* encoding: US-ASCII 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* tab size: 8 (not used) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* indentation:4 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created on: 2003jun13 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* created by: Markus W. Scherer 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)* Conversion extensions 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)*/ 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uset.h" 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_bld.h" 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_cnv.h" 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "ucnv_ext.h" 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* to Unicode --------------------------------------------------------------- */ 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return lookup value for the byte, if found; else 0 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE uint32_t 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t word0, word; 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, start, limit; 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* check the input byte against the lowest and highest section bytes */ 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(byte<start || limit<byte) { 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* the byte is out of range */ 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length==((limit-start)+1)) { 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* direct access on a linear array */ 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Shift byte once instead of each section word and add 0xffffff. 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We will compare the shifted/added byte (bbffffff) against 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * section words which have byte values in the same bit position. 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * for all v=0..f 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * so we need not mask off the lower 24 bits of each section word. 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) word=word0|UCNV_EXT_TO_U_VALUE_MASK; 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* binary search */ 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=0; 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=length; 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=limit-start; 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<=1) { 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; /* done */ 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* start<limit-1 */ 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<=4) { 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* linear search for the last part */ 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(word0<=toUSection[start]) { 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(++start<limit && word0<=toUSection[start]) { 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(++start<limit && word0<=toUSection[start]) { 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* always break at start==limit-1 */ 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++start; 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=(start+limit)/2; 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(word<toUSection[i]) { 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=i; 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=i; 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* did we really find it? */ 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* not found */ 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * TRUE if not an SI/SO stateful converter, 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or if the match length fits with the current converter state 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((sisoState)<0 || ((sisoState)==0) == (match==1)) 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * this works like ucnv_extMatchFromU() except 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - the first character is in pre 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - no trie is used 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - the returned matchLength is not offset by 2 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *pre, int32_t preLength, 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *src, int32_t srcLength, 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pMatchValue, 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback, UBool flush) { 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *toUTable, *toUSection; 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, matchValue; 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, j, idx, length, matchLength; 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t b; 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* no extension data, no match */ 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* initialize */ 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=0; 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=0; 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=j=matchLength=0; 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(sisoState==0) { 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(preLength>1) { 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* no match of a DBCS sequence in SBCS mode */ 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(preLength==1) { 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) srcLength=0; 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* preLength==0 */ { 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(srcLength>1) { 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) srcLength=1; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) flush=TRUE; 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we must not remember fallback matches when not using fallbacks */ 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* match input units until there is a full match or the input is consumed */ 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* go to the next section */ 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) toUSection=toUTable+idx; 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read first pair of the section */ 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=*toUSection++; 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=UCNV_EXT_TO_U_GET_BYTE(value); 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=UCNV_EXT_TO_U_GET_VALUE(value); 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( value!=0 && 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TO_U_USE_FALLBACK(useFallback)) && 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* remember longest match so far */ 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=value; 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchLength=i+j; 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* match pre[] then src[] */ 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<preLength) { 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b=(uint8_t)pre[i++]; 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(j<srcLength) { 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) b=(uint8_t)src[j++]; 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* all input consumed, partial match */ 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * end of the entire input stream, stop with the longest match so far 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or: partial match must not be longer than UCNV_EXT_MAX_BYTES 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * because it must fit into state buffers 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* continue with more input next time */ 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -length; 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for the current UChar */ 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=ucnv_extFindToU(toUSection, length, b); 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==0) { 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no match here, stop with the longest match so far */ 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* partial match, continue */ 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) TO_U_USE_FALLBACK(useFallback)) && 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* full match, stop with result */ 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=value; 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchLength=i+j; 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* full match on fallback not taken, stop with the longest match so far */ 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(matchLength==0) { 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no match at all */ 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* return result */ 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return matchLength; 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE void 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar **target, const UChar *targetLimit, 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, int32_t srcIndex, 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output the result */ 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output a single code point */ 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_toUWriteCodePoint( 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) target, targetLimit, 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsets, srcIndex, 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output a string - with correct data we have resultLength>0 */ 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_toUWriteUChars( 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv, 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_TO_U_GET_INDEX(value), 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_TO_U_GET_LENGTH(value), 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) target, targetLimit, 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsets, srcIndex, 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or 1 for DBCS-only, 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or -1 if the converter is not SI/SO stateful 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Note: For SI/SO stateful converters getting here, 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * cnv->mode==0 is equivalent to firstLength==1. 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#define UCNV_SISO_STATE(cnv) \ 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * target<targetLimit; set error code for overflow 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC UBool 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t firstLength, 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char **src, const char *srcLimit, 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar **target, const UChar *targetLimit, 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, int32_t srcIndex, 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool flush, 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match; 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* try to match */ 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (const char *)cnv->toUBytes, firstLength, 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src, (int32_t)(srcLimit-*src), 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useFallback, flush); 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>0) { 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance src pointer for the consumed input */ 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src+=match-firstLength; 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result to target */ 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extWriteToU(cnv, cx, 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) target, targetLimit, 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsets, srcIndex, 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(match<0) { 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* save state for partial match */ 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *s; 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j; 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy the first code point */ 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=(const char *)cnv->toUBytes; 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToUFirstLength=(int8_t)firstLength; 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<firstLength; ++j) { 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToU[j]=*s++; 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* now copy the newly consumed input */ 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=*src; 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=-match; 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(; j<match; ++j) { 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToU[j]=*s++; 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src=s; /* same as *src=srcLimit; because we reached the end of input */ 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToULength=(int8_t)match; 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* match==0 no match */ { 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC UChar32 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extSimpleMatchToU(const int32_t *cx, 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *source, int32_t length, 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback) { 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match; 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<=0) { 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xffff; 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* try to match */ 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchToU(cx, -1, 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) source, length, 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 0, 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) useFallback, TRUE); 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match==length) { 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result for simple, single-character conversion */ 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UCNV_EXT_TO_U_GET_CODE_POINT(value); 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * return no match because 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match>0 && value points to string: simple conversion cannot handle multiple code points 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match>0 && match!=length: not all input consumed, forbidden for this function 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match==0: no match found in the first place 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0xfffe; 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * continue partial match with new input 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * never called for simple, single-character conversion 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extContinueMatchToU(UConverter *cnv, 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterToUnicodeArgs *pArgs, int32_t srcIndex, 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match, length; 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToU, cnv->preToULength, 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useFallback, pArgs->flush); 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>0) { 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>=cnv->preToULength) { 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance src pointer for the consumed input */ 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source+=match-cnv->preToULength; 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToULength=0; 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the match did not use all of preToU[] - keep the rest for replay */ 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=cnv->preToULength-match; 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(cnv->preToU, cnv->preToU+match, length); 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToULength=(int8_t)-length; 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result */ 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &pArgs->target, pArgs->targetLimit, 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &pArgs->offsets, srcIndex, 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(match<0) { 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* save state for partial match */ 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const char *s; 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j; 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* just _append_ the newly consumed input to preToU[] */ 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=pArgs->source; 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=-match; 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=cnv->preToULength; j<match; ++j) { 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToU[j]=*s++; 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToULength=(int8_t)match; 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* match==0 */ { 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * no match 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We need to split the previous input into two parts: 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1. The first codepage character is unmappable - that's how we got into 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * trying the extension data in the first place. 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We need to move it from the preToU buffer 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to the error buffer, set an error code, 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and prepare the rest of the previous input for 2. 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 2. The rest of the previous input must be converted once we 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * come back from the callback for the first character. 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * At that time, we have to try again from scratch to convert 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * these input characters. 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The replay will be handled by the ucnv.c conversion code. 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the first codepage character to the error field */ 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->toULength=cnv->preToUFirstLength; 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the rest up inside the buffer */ 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=cnv->preToULength-cnv->preToUFirstLength; 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length>0) { 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* mark preToU for replay */ 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preToULength=(int8_t)-length; 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the error code for unassigned */ 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_CHAR_FOUND; 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* from Unicode ------------------------------------------------------------- */ 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return index of the UChar, if found; else <0 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE int32_t 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, start, limit; 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* binary search */ 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=0; 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=length; 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=limit-start; 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<=1) { 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; /* done */ 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* start<limit-1 */ 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<=4) { 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* linear search for the last part */ 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(u<=fromUSection[start]) { 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(++start<limit && u<=fromUSection[start]) { 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(++start<limit && u<=fromUSection[start]) { 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* always break at start==limit-1 */ 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++start; 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=(start+limit)/2; 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(u<fromUSection[i]) { 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) limit=i; 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) start=i; 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* did we really find it? */ 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(start<limit && u==fromUSection[start]) { 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return start; 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -1; /* not found */ 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param cx pointer to extension data; if NULL, returns 0 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param firstCP the first code point before all the other UChars 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pre UChars that must match; !initialMatch: partial match with them 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param preLength length of pre, >=0 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param src UChars that can be used to complete a match 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param srcLength length of src, >=0 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param pMatchValue [out] output result value for the match from the data structure 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param useFallback "use fallback" flag, usually from cnv->useFallback 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param flush TRUE if the end of the input stream is reached 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return >1: matched, return value=total match length (number of input units matched) 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1: matched, no mapping but request for <subchar1> 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (only for the first code point) 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 0: no match 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <0: partial match, return value=negative total match length 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (partial matches are never returned for flush==TRUE) 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the matchLength is 2 if only firstCP matched, and >2 if firstCP and 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * further code units matched 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static int32_t 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extMatchFromU(const int32_t *cx, 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 firstCP, 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *pre, int32_t preLength, 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *src, int32_t srcLength, 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t *pMatchValue, 523f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback, UBool flush) { 524f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *stage12, *stage3; 525f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *stage3b; 526f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 527f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *fromUTableUChars, *fromUSectionUChars; 528f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *fromUTableValues, *fromUSectionValues; 529f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 530f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, matchValue; 531f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, j, idx, length, matchLength; 532f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar c; 533f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 534f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cx==NULL) { 535f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* no extension data, no match */ 536f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 537f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 538f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* trie lookup of firstCP */ 539f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=firstCP>>10; /* stage 1 index */ 540f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { 541f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; /* the first code point is outside the trie */ 542f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 543f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 544f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 545f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 546f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); 547f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 548f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 549f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=stage3b[idx]; 550f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==0) { 551f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 552f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 553f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 554f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 555f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: 556f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Do not interpret values with reserved bits used, for forward compatibility, 557f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and do not even remember intermediate results with reserved bits used. 558f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 559f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 560f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { 561f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* partial match, enter the loop below */ 562f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 563f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 564f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* initialize */ 565f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); 566f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); 567f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 568f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=0; 569f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) i=j=matchLength=0; 570f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 571f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* we must not remember fallback matches when not using fallbacks */ 572f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 573f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* match input units until there is a full match or the input is consumed */ 574f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(;;) { 575f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* go to the next section */ 576f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUSectionUChars=fromUTableUChars+idx; 577f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUSectionValues=fromUTableValues+idx; 578f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 579f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read first pair of the section */ 580f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=*fromUSectionUChars++; 581f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=*fromUSectionValues++; 582f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( value!=0 && 583f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 584f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FROM_U_USE_FALLBACK(useFallback, firstCP)) && 585f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 586f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 587f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* remember longest match so far */ 588f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=value; 589f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchLength=2+i+j; 590f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 591f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 592f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* match pre[] then src[] */ 593f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(i<preLength) { 594f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=pre[i++]; 595f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(j<srcLength) { 596f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=src[j++]; 597f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 598f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* all input consumed, partial match */ 599f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { 600f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 601f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * end of the entire input stream, stop with the longest match so far 602f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS 603f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * because it must fit into state buffers 604f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 605f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 606f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 607f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* continue with more input next time */ 608f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return -(2+length); 609f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 610f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 611f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 612f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* search for the current UChar */ 613f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=ucnv_extFindFromU(fromUSectionUChars, length, c); 614f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(idx<0) { 615f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no match here, stop with the longest match so far */ 616f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 617f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 618f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=fromUSectionValues[idx]; 619f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 620f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* partial match, continue */ 621f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); 622f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 623f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 624f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FROM_U_USE_FALLBACK(useFallback, firstCP)) && 625f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 626f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 627f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* full match, stop with result */ 628f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=value; 629f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchLength=2+i+j; 630f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 631f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* full match on fallback not taken, stop with the longest match so far */ 632f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 633f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 634f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 635f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 636f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 637f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 638f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(matchLength==0) { 639f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no match at all */ 640f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 641f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 642f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* result from firstCP trie lookup */ { 643f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || 644f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FROM_U_USE_FALLBACK(useFallback, firstCP)) && 645f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 646f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 647f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* full match, stop with result */ 648f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchValue=value; 649f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) matchLength=2; 650f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 651f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* fallback not taken */ 652f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 653f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 654f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 655f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 656f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* return result */ 657f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { 658f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 1; /* assert matchLength==2 */ 659f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 660f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 661f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pMatchValue=matchValue; 662f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return matchLength; 663f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 664f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 665f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 666f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits 667f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 668f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static U_INLINE void 669f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, 670f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value, 671f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char **target, const char *targetLimit, 672f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, int32_t srcIndex, 673f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 674f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; 675f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *result; 676f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length, prevLength; 677f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 678f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=UCNV_EXT_FROM_U_GET_LENGTH(value); 679f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 680f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 681f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* output the result */ 682f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 683f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 684f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Generate a byte array and then write it below. 685f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * This is not the fastest possible way, but it should be ok for 686f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * extension mappings, and it is much simpler. 687f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Offset and overflow handling are only done once this way. 688f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 689f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ 690f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(length) { 691f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 3: 692f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *p++=(uint8_t)(value>>16); 693f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 2: 694f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *p++=(uint8_t)(value>>8); 695f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case 1: 696f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *p++=(uint8_t)value; 697f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 698f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; /* will never occur */ 699f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 700f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result=buffer+1; 701f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 702f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 703f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 704f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 705f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* with correct data we have length>0 */ 706f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 707f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((prevLength=cnv->fromUnicodeStatus)!=0) { 708f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* handle SI/SO stateful output */ 709f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint8_t shiftByte; 710f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 711f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(prevLength>1 && length==1) { 712f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* change from double-byte mode to single-byte */ 713f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) shiftByte=(uint8_t)UCNV_SI; 714f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->fromUnicodeStatus=1; 715f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(prevLength==1 && length>1) { 716f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* change from single-byte mode to double-byte */ 717f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) shiftByte=(uint8_t)UCNV_SO; 718f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->fromUnicodeStatus=2; 719f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 720f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) shiftByte=0; 721f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 722f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 723f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(shiftByte!=0) { 724f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* prepend the shift byte to the result bytes */ 725f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer[0]=shiftByte; 726f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(result!=buffer+1) { 727f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memcpy(buffer+1, result, length); 728f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 729f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result=buffer; 730f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ++length; 731f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 732f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 733f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 734f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_fromUWriteBytes(cnv, (const char *)result, length, 735f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) target, targetLimit, 736f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsets, srcIndex, 737f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 738f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 739f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 740f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 741f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * target<targetLimit; set error code for overflow 742f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 743f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC UBool 744f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, 745f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp, 746f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar **src, const UChar *srcLimit, 747f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) char **target, const char *targetLimit, 748f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t **offsets, int32_t srcIndex, 749f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool flush, 750f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 751f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 752f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match; 753f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 754f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* try to match */ 755f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchFromU(cx, cp, 756f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 0, 757f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src, (int32_t)(srcLimit-*src), 758f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 759f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useFallback, flush); 760f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 761f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* reject a match if the result is a single byte for DBCS-only */ 762f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( match>=2 && 763f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && 764f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) 765f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 766f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance src pointer for the consumed input */ 767f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src+=match-2; /* remove 2 for the initial code point */ 768f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 769f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result to target */ 770f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extWriteFromU(cnv, cx, 771f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, 772f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) target, targetLimit, 773f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) offsets, srcIndex, 774f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 775f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 776f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(match<0) { 777f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* save state for partial match */ 778f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *s; 779f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j; 780f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 781f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* copy the first code point */ 782f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromUFirstCP=cp; 783f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 784f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* now copy the newly consumed input */ 785f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=*src; 786f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=-match-2; /* remove 2 for the initial code point */ 787f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=0; j<match; ++j) { 788f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromU[j]=*s++; 789f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 790f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *src=s; /* same as *src=srcLimit; because we reached the end of input */ 791f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromULength=(int8_t)match; 792f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return TRUE; 793f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(match==1) { 794f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* matched, no mapping but request for <subchar1> */ 795f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useSubChar1=TRUE; 796f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 797f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* match==0 no match */ { 798f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 799f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 800f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 801f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 802f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 803f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Used by ISO 2022 implementation. 804f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping 805f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 806f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC int32_t 807f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extSimpleMatchFromU(const int32_t *cx, 808f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 cp, uint32_t *pValue, 809f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback) { 810f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 811f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match; 812f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 813f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* try to match */ 814f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchFromU(cx, 815f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cp, 816f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 0, 817f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) NULL, 0, 818f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 819f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) useFallback, TRUE); 820f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>=2) { 821f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result for simple, single-character conversion */ 822f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length; 823f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int isRoundtrip; 824f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 825f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); 826f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=UCNV_EXT_FROM_U_GET_LENGTH(value); 827f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); 828f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 829f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { 830f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pValue=value; 831f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return isRoundtrip ? length : -length; 832f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if 0 /* not currently used */ 833f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(length==4) { 834f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* de-serialize a 4-byte result */ 835f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; 836f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pValue= 837f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((uint32_t)result[0]<<24)| 838f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((uint32_t)result[1]<<16)| 839f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((uint32_t)result[2]<<8)| 840f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result[3]; 841f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return isRoundtrip ? 4 : -4; 842f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif 843f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 844f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 845f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 846f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 847f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * return no match because 848f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match>1 && resultLength>4: result too long for simple conversion 849f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match==1: no match found, <subchar1> preferred 850f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match==0: no match found in the first place 851f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * - match<0: partial match, not supported for simple conversion (and flush==TRUE) 852f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 853f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 0; 854f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 855f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 856f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 857f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * continue partial match with new input, requires cnv->preFromUFirstCP>=0 858f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * never called for simple, single-character conversion 859f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 860f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 861f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extContinueMatchFromU(UConverter *cnv, 862f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, 863f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 864f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ 865f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t match; 866f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 867f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, 868f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromUFirstCP, 869f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromU, cnv->preFromULength, 870f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), 871f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &value, 872f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useFallback, pArgs->flush); 873f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>=2) { 874f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match-=2; /* remove 2 for the initial code point */ 875f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 876f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match>=cnv->preFromULength) { 877f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* advance src pointer for the consumed input */ 878f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source+=match-cnv->preFromULength; 879f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromULength=0; 880f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 881f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* the match did not use all of preFromU[] - keep the rest for replay */ 882f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length=cnv->preFromULength-match; 883f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR); 884f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromULength=(int8_t)-length; 885f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 886f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 887f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* finish the partial match */ 888f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromUFirstCP=U_SENTINEL; 889f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 890f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* write result */ 891f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, 892f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value, 893f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &pArgs->target, pArgs->targetLimit, 894f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) &pArgs->offsets, srcIndex, 895f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 896f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(match<0) { 897f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* save state for partial match */ 898f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *s; 899f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t j; 900f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 901f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* just _append_ the newly consumed input to preFromU[] */ 902f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s=pArgs->source; 903f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) match=-match-2; /* remove 2 for the initial code point */ 904f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(j=cnv->preFromULength; j<match; ++j) { 905f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromU[j]=*s++; 906f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 907f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ 908f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromULength=(int8_t)match; 909f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else /* match==0 or 1 */ { 910f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 911f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * no match 912f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 913f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We need to split the previous input into two parts: 914f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 915f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 1. The first code point is unmappable - that's how we got into 916f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * trying the extension data in the first place. 917f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * We need to move it from the preFromU buffer 918f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * to the error buffer, set an error code, 919f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * and prepare the rest of the previous input for 2. 920f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 921f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 2. The rest of the previous input must be converted once we 922f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * come back from the callback for the first code point. 923f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * At that time, we have to try again from scratch to convert 924f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * these input characters. 925f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The replay will be handled by the ucnv.c conversion code. 926f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 927f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 928f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(match==1) { 929f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* matched, no mapping but request for <subchar1> */ 930f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->useSubChar1=TRUE; 931f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 932f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 933f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* move the first code point to the error field */ 934f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->fromUChar32=cnv->preFromUFirstCP; 935f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromUFirstCP=U_SENTINEL; 936f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 937f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* mark preFromU for replay */ 938f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cnv->preFromULength=-cnv->preFromULength; 939f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 940f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* set the error code for unassigned */ 941f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *pErrorCode=U_INVALID_CHAR_FOUND; 942f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 943f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 944f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 945f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static void 946f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, 947f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t *cx, 948f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const USetAdder *sa, 949f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback, 950f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t minLength, 951f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c, 952f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, 953f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t sectionIndex, 954f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 955f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const UChar *fromUSectionUChars; 956f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *fromUSectionValues; 957f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 958f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 959f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t i, count; 960f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 961f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; 962f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; 963f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 964f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read first pair of the section */ 965f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) count=*fromUSectionUChars++; 966f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=*fromUSectionValues++; 967f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 968f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if( value!=0 && 969f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) && 970f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 971f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 972f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(c>=0) { 973f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* add the initial code point */ 974f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->add(sa->set, c); 975f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 976f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* add the string so far */ 977f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->addString(sa->set, s, length); 978f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 979f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 980f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 981f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(i=0; i<count; ++i) { 982f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* append this code unit and recurse or add the string */ 983f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) s[length]=fromUSectionUChars[i]; 984f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=fromUSectionValues[i]; 985f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 986f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==0) { 987f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no mapping, do nothing */ 988f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 989f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extGetUnicodeSetString( 990f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sharedData, cx, sa, useFallback, minLength, 991f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U_SENTINEL, s, length+1, 992f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 993f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 994f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if((useFallback ? 995f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : 996f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== 997f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && 998f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 999f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1000f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->addString(sa->set, s, length+1); 1001f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1002f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1003f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1004f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1005f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_CFUNC void 1006f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, 1007f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const USetAdder *sa, 1008f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterUnicodeSet which, 1009f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UConverterSetFilter filter, 1010f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode *pErrorCode) { 1011f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const int32_t *cx; 1012f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint16_t *stage12, *stage3, *ps2, *ps3; 1013f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const uint32_t *stage3b; 1014f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1015f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) uint32_t value; 1016f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t st1, stage1Length, st2, st3, minLength; 1017f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool useFallback; 1018f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1019f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar s[UCNV_EXT_MAX_UCHARS]; 1020f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 1021f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length; 1022f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1023f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) cx=sharedData->mbcs.extIndexes; 1024f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(cx==NULL) { 1025f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 1026f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1027f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1028f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); 1029f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); 1030f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); 1031f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1032f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; 1033f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1034f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); 1035f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1036f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* enumerate the from-Unicode trie table */ 1037f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c=0; /* keep track of the current code point while enumerating */ 1038f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1039f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(filter==UCNV_SET_FILTER_2022_CN) { 1040f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) minLength=3; 1041f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || 1042f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) filter!=UCNV_SET_FILTER_NONE 1043f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1044f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* DBCS-only, ignore single-byte results */ 1045f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) minLength=2; 1046f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1047f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) minLength=1; 1048f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1049f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1050f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1051f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the trie enumeration is almost the same as 1052f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 1053f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1054f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(st1=0; st1<stage1Length; ++st1) { 1055f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) st2=stage12[st1]; 1056f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(st2>stage1Length) { 1057f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ps2=stage12+st2; 1058f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) for(st2=0; st2<64; ++st2) { 1059f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { 1060f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* read the stage 3 block */ 1061f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ps3=stage3+st3; 1062f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1063f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1064f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Add code points for which the roundtrip flag is set. 1065f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Do not add <subchar1> entries or other (future?) pseudo-entries 1066f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * with an output length of 0, or entries with reserved bits set. 1067f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Recurse for partial results. 1068f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1069f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) do { 1070f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) value=stage3b[*ps3++]; 1071f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(value==0) { 1072f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* no mapping, do nothing */ 1073f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { 1074f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) length=0; 1075f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) U16_APPEND_UNSAFE(s, length, c); 1076f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ucnv_extGetUnicodeSetString( 1077f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sharedData, cx, sa, useFallback, minLength, 1078f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c, s, length, 1079f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), 1080f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) pErrorCode); 1081f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else if((useFallback ? 1082f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 : 1083f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))== 1084f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) && 1085f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength 1086f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ) { 1087f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) switch(filter) { 1088f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UCNV_SET_FILTER_2022_CN: 1089f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { 1090f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1091f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1092f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1093f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UCNV_SET_FILTER_SJIS: 1094f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { 1095f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1096f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1097f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1098f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UCNV_SET_FILTER_GR94DBCS: 1099f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && 1101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) case UCNV_SET_FILTER_HZ: 1106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && 1107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && 1108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { 1109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) continue; 1110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) default: 1113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) /* 1114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * UCNV_SET_FILTER_NONE, 1115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength 1116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 1117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 1118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) sa->add(sa->set, c); 1120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } while((++c&0xf)!=0); 1122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=16; /* empty stage 3 block */ 1124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 1127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) c+=1024; /* empty stage 2 block */ 1128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 1130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 1131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 1132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 1133