1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho*   Copyright (C) 2003-2009, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  ucnv_ext.c
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2003jun13
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Conversion extensions
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_ext.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return lookup value for the byte, if found; else 0
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE uint32_t
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t word0, word;
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, start, limit;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* check the input byte against the lowest and highest section bytes */
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]);
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]);
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(byte<start || limit<byte) {
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* the byte is out of range */
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length==((limit-start)+1)) {
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* direct access on a linear array */
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0);
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Shift byte once instead of each section word and add 0xffffff.
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * We will compare the shifted/added byte (bbffffff) against
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * section words which have byte values in the same bit position.
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for all v=0..f
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * so we need not mask off the lower 24 bits of each section word.
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    word=word0|UCNV_EXT_TO_U_VALUE_MASK;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* binary search */
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=0;
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=length;
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=limit-start;
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=1) {
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* done */
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* start<limit-1 */
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=4) {
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* linear search for the last part */
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(word0<=toUSection[start]) {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && word0<=toUSection[start]) {
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && word0<=toUSection[start]) {
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* always break at start==limit-1 */
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++start;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=(start+limit)/2;
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(word<toUSection[i]) {
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=i;
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start=i;
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* did we really find it? */
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) {
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* not found */
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRUE if not an SI/SO stateful converter,
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the match length fits with the current converter state
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((sisoState)<0 || ((sisoState)==0) == (match==1))
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this works like ucnv_extMatchFromU() except
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the first character is in pre
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - no trie is used
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the returned matchLength is not offset by 2
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 const char *pre, int32_t preLength,
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 const char *src, int32_t srcLength,
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 uint32_t *pMatchValue,
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UBool useFallback, UBool flush) {
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *toUTable, *toUSection;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value, matchValue;
12885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, j, idx, length, matchLength;
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* no extension data, no match */
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* initialize */
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
13785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=0;
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchValue=0;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    i=j=matchLength=0;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(sisoState==0) {
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(preLength>1) {
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0; /* no match of a DBCS sequence in SBCS mode */
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(preLength==1) {
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            srcLength=0;
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else /* preLength==0 */ {
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(srcLength>1) {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                srcLength=1;
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=TRUE;
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* we must not remember fallback matches when not using fallbacks */
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* match input units until there is a full match or the input is consumed */
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* go to the next section */
16185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        toUSection=toUTable+idx;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* read first pair of the section */
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=*toUSection++;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=UCNV_EXT_TO_U_GET_BYTE(value);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=UCNV_EXT_TO_U_GET_VALUE(value);
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if( value!=0 &&
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             TO_U_USE_FALLBACK(useFallback)) &&
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ) {
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* remember longest match so far */
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchValue=value;
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchLength=i+j;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* match pre[] then src[] */
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<preLength) {
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=(uint8_t)pre[i++];
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(j<srcLength) {
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=(uint8_t)src[j++];
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* all input consumed, partial match */
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) {
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * end of the entire input stream, stop with the longest match so far
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * or: partial match must not be longer than UCNV_EXT_MAX_BYTES
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * because it must fit into state buffers
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* continue with more input next time */
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return -length;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* search for the current UChar */
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=ucnv_extFindToU(toUSection, length, b);
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(value==0) {
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no match here, stop with the longest match so far */
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* partial match, continue */
20585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     TO_U_USE_FALLBACK(useFallback)) &&
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* full match, stop with result */
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    matchValue=value;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    matchLength=i+j;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* full match on fallback not taken, stop with the longest match so far */
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(matchLength==0) {
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* no match at all */
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* return result */
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue);
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return matchLength;
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 uint32_t value,
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UChar **target, const UChar *targetLimit,
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 int32_t **offsets, int32_t srcIndex,
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UErrorCode *pErrorCode) {
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output the result */
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* output a single code point */
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_toUWriteCodePoint(
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value),
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target, targetLimit,
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsets, srcIndex,
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pErrorCode);
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* output a string - with correct data we have resultLength>0 */
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_toUWriteUChars(
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv,
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UCNV_EXT_TO_U_GET_INDEX(value),
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_TO_U_GET_LENGTH(value),
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target, targetLimit,
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsets, srcIndex,
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pErrorCode);
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or 1 for DBCS-only,
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or -1 if the converter is not SI/SO stateful
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: For SI/SO stateful converters getting here,
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cnv->mode==0 is equivalent to firstLength==1.
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_SISO_STATE(cnv) \
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        int32_t firstLength,
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        const char **src, const char *srcLimit,
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UChar **target, const UChar *targetLimit,
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        int32_t **offsets, int32_t srcIndex,
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UBool flush,
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UErrorCode *pErrorCode) {
28285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv),
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           (const char *)cnv->toUBytes, firstLength,
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           *src, (int32_t)(srcLimit-*src),
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->useFallback, flush);
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>0) {
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* advance src pointer for the consumed input */
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src+=match-firstLength;
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result to target */
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteToU(cnv, cx,
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         value,
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         target, targetLimit,
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         offsets, srcIndex,
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *s;
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* copy the first code point */
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=(const char *)cnv->toUBytes;
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToUFirstLength=(int8_t)firstLength;
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=0; j<firstLength; ++j) {
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now copy the newly consumed input */
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=*src;
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(; j<match; ++j) {
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src=s; /* same as *src=srcLimit; because we reached the end of input */
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)match;
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 no match */ {
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchToU(const int32_t *cx,
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       const char *source, int32_t length,
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       UBool useFallback) {
33285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=0) {
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0xffff;
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cx, -1,
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           source, length,
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           NULL, 0,
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           useFallback, TRUE);
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match==length) {
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result for simple, single-character conversion */
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UCNV_EXT_TO_U_GET_CODE_POINT(value);
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return no match because
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>0 && value points to string: simple conversion cannot handle multiple code points
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>0 && match!=length: not all input consumed, forbidden for this function
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==0: no match found in the first place
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0xfffe;
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchToU(UConverter *cnv,
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UErrorCode *pErrorCode) {
37085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match, length;
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->preToU, cnv->preToULength,
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->useFallback, pArgs->flush);
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>0) {
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match>=cnv->preToULength) {
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* advance src pointer for the consumed input */
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pArgs->source+=match-cnv->preToULength;
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToULength=0;
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the match did not use all of preToU[] - keep the rest for replay */
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            length=cnv->preToULength-match;
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preToU, cnv->preToU+match, length);
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToULength=(int8_t)-length;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result */
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         value,
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         &pArgs->target, pArgs->targetLimit,
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         &pArgs->offsets, srcIndex,
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *s;
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* just _append_ the newly consumed input to preToU[] */
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=pArgs->source;
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match;
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=cnv->preToULength; j<match; ++j) {
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)match;
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 */ {
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * no match
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We need to split the previous input into two parts:
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 1. The first codepage character is unmappable - that's how we got into
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    trying the extension data in the first place.
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    We need to move it from the preToU buffer
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    to the error buffer, set an error code,
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    and prepare the rest of the previous input for 2.
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 2. The rest of the previous input must be converted once we
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    come back from the callback for the first character.
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    At that time, we have to try again from scratch to convert
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    these input characters.
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    The replay will be handled by the ucnv.c conversion code.
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the first codepage character to the error field */
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength);
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=cnv->preToUFirstLength;
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the rest up inside the buffer */
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=cnv->preToULength-cnv->preToUFirstLength;
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(length>0) {
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length);
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mark preToU for replay */
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)-length;
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the error code for unassigned */
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INVALID_CHAR_FOUND;
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return index of the UChar, if found; else <0
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE int32_t
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) {
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, start, limit;
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* binary search */
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=0;
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=length;
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=limit-start;
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=1) {
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* done */
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* start<limit-1 */
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=4) {
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* linear search for the last part */
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(u<=fromUSection[start]) {
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && u<=fromUSection[start]) {
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && u<=fromUSection[start]) {
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* always break at start==limit-1 */
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++start;
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=(start+limit)/2;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(u<fromUSection[i]) {
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=i;
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start=i;
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* did we really find it? */
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(start<limit && u==fromUSection[start]) {
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return start;
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1; /* not found */
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param cx pointer to extension data; if NULL, returns 0
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param firstCP the first code point before all the other UChars
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pre UChars that must match; !initialMatch: partial match with them
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param preLength length of pre, >=0
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param src UChars that can be used to complete a match
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param srcLength length of src, >=0
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pMatchValue [out] output result value for the match from the data structure
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param useFallback "use fallback" flag, usually from cnv->useFallback
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param flush TRUE if the end of the input stream is reached
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return >1: matched, return value=total match length (number of input units matched)
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          1: matched, no mapping but request for <subchar1>
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (only for the first code point)
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          0: no match
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         <0: partial match, return value=negative total match length
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (partial matches are never returned for flush==TRUE)
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS)
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         the matchLength is 2 if only firstCP matched, and >2 if firstCP and
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         further code units matched
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchFromU(const int32_t *cx,
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UChar32 firstCP,
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   const UChar *pre, int32_t preLength,
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   const UChar *src, int32_t srcLength,
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   uint32_t *pMatchValue,
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UBool useFallback, UBool flush) {
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint16_t *stage12, *stage3;
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *stage3b;
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *fromUTableUChars, *fromUSectionUChars;
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *fromUTableValues, *fromUSectionValues;
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value, matchValue;
53185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, j, idx, length, matchLength;
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar c;
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL) {
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* no extension data, no match */
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* trie lookup of firstCP */
53985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=firstCP>>10; /* stage 1 index */
54085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* the first code point is outside the trie */
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    value=stage3b[idx];
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(value==0) {
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Do not interpret values with reserved bits used, for forward compatibility,
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and do not even remember intermediate results with reserved bits used.
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* partial match, enter the loop below */
56285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* initialize */
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t);
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        matchValue=0;
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=j=matchLength=0;
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* we must not remember fallback matches when not using fallbacks */
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* match input units until there is a full match or the input is consumed */
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* go to the next section */
57685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fromUSectionUChars=fromUTableUChars+idx;
57785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fromUSectionValues=fromUTableValues+idx;
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* read first pair of the section */
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            length=*fromUSectionUChars++;
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            value=*fromUSectionValues++;
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if( value!=0 &&
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ) {
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* remember longest match so far */
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                matchValue=value;
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                matchLength=2+i+j;
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* match pre[] then src[] */
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(i<preLength) {
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=pre[i++];
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(j<srcLength) {
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=src[j++];
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* all input consumed, partial match */
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) {
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * end of the entire input stream, stop with the longest match so far
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * because it must fit into state buffers
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* continue with more input next time */
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return -(2+length);
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* search for the current UChar */
61385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
61485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(idx<0) {
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no match here, stop with the longest match so far */
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
61885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                value=fromUSectionValues[idx];
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* partial match, continue */
62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ) {
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* full match, stop with result */
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        matchValue=value;
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        matchLength=2+i+j;
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* full match on fallback not taken, stop with the longest match so far */
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(matchLength==0) {
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no match at all */
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* result from firstCP trie lookup */ {
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ) {
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* full match, stop with result */
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchValue=value;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchLength=2;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* fallback not taken */
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* return result */
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 1; /* assert matchLength==2 */
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pMatchValue=matchValue;
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return matchLength;
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic U_INLINE void
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   uint32_t value,
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   char **target, const char *targetLimit,
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   int32_t **offsets, int32_t srcIndex,
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UErrorCode *pErrorCode) {
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t buffer[1+UCNV_EXT_MAX_BYTES];
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *result;
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length, prevLength;
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=UCNV_EXT_FROM_U_GET_LENGTH(value);
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output the result */
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Generate a byte array and then write it below.
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * This is not the fastest possible way, but it should be ok for
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * extension mappings, and it is much simpler.
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Offset and overflow handling are only done once this way.
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(length) {
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(value>>16);
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 2:
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(value>>8);
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 1:
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)value;
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* will never occur */
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result=buffer+1;
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* with correct data we have length>0 */
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((prevLength=cnv->fromUnicodeStatus)!=0) {
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* handle SI/SO stateful output */
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t shiftByte;
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(prevLength>1 && length==1) {
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* change from double-byte mode to single-byte */
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=(uint8_t)UCNV_SI;
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->fromUnicodeStatus=1;
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(prevLength==1 && length>1) {
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* change from single-byte mode to double-byte */
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=(uint8_t)UCNV_SO;
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->fromUnicodeStatus=2;
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=0;
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(shiftByte!=0) {
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* prepend the shift byte to the result bytes */
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buffer[0]=shiftByte;
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(result!=buffer+1) {
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                uprv_memcpy(buffer+1, result, length);
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=buffer;
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++length;
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUWriteBytes(cnv, (const char *)result, length,
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         target, targetLimit,
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         offsets, srcIndex,
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UChar32 cp,
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          const UChar **src, const UChar *srcLimit,
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          char **target, const char *targetLimit,
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          int32_t **offsets, int32_t srcIndex,
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UBool flush,
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UErrorCode *pErrorCode) {
75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cx, cp,
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             *src, (int32_t)(srcLimit-*src),
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->useFallback, flush);
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* reject a match if the result is a single byte for DBCS-only */
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( match>=2 &&
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ) {
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* advance src pointer for the consumed input */
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src+=match-2; /* remove 2 for the initial code point */
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result to target */
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteFromU(cnv, cx,
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           value,
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           target, targetLimit,
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           offsets, srcIndex,
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pErrorCode);
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *s;
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* copy the first code point */
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=cp;
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now copy the newly consumed input */
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=*src;
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match-2; /* remove 2 for the initial code point */
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=0; j<match; ++j) {
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromU[j]=*s++;
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src=s; /* same as *src=srcLimit; because we reached the end of input */
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=(int8_t)match;
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match==1) {
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* matched, no mapping but request for <subchar1> */
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->useSubChar1=TRUE;
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 no match */ {
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Used by ISO 2022 implementation.
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int32_t
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchFromU(const int32_t *cx,
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UChar32 cp, uint32_t *pValue,
809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UBool useFallback) {
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cx,
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cp,
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             useFallback, TRUE);
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>=2) {
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result for simple, single-character conversion */
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t length;
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int isRoundtrip;
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=UCNV_EXT_FROM_U_GET_LENGTH(value);
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pValue=value;
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return isRoundtrip ? length : -length;
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 /* not currently used */
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(length==4) {
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* de-serialize a 4-byte result */
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pValue=
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[0]<<24)|
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[1]<<16)|
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[2]<<8)|
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                result[3];
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return isRoundtrip ? 4 : -4;
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return no match because
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>1 && resultLength>4: result too long for simple conversion
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==1: no match found, <subchar1> preferred
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==0: no match found in the first place
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input, requires cnv->preFromUFirstCP>=0
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchFromU(UConverter *cnv,
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode *pErrorCode) {
86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->preFromUFirstCP,
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->preFromU, cnv->preFromULength,
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->useFallback, pArgs->flush);
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>=2) {
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match-=2; /* remove 2 for the initial code point */
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match>=cnv->preFromULength) {
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* advance src pointer for the consumed input */
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pArgs->source+=match-cnv->preFromULength;
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromULength=0;
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the match did not use all of preFromU[] - keep the rest for replay */
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t length=cnv->preFromULength-match;
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR);
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromULength=(int8_t)-length;
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* finish the partial match */
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=U_SENTINEL;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result */
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           value,
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pArgs->target, pArgs->targetLimit,
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pArgs->offsets, srcIndex,
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pErrorCode);
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *s;
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* just _append_ the newly consumed input to preFromU[] */
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=pArgs->source;
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match-2; /* remove 2 for the initial code point */
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=cnv->preFromULength; j<match; ++j) {
905ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromU[j]=*s++;
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=(int8_t)match;
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 or 1 */ {
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * no match
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We need to split the previous input into two parts:
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 1. The first code point is unmappable - that's how we got into
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    trying the extension data in the first place.
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    We need to move it from the preFromU buffer
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    to the error buffer, set an error code,
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    and prepare the rest of the previous input for 2.
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 2. The rest of the previous input must be converted once we
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    come back from the callback for the first code point.
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    At that time, we have to try again from scratch to convert
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    these input characters.
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    The replay will be handled by the ucnv.c conversion code.
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match==1) {
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* matched, no mapping but request for <subchar1> */
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->useSubChar1=TRUE;
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the first code point to the error field */
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32=cnv->preFromUFirstCP;
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=U_SENTINEL;
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mark preFromU for replay */
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=-cnv->preFromULength;
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the error code for unassigned */
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INVALID_CHAR_FOUND;
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
946ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
947ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const int32_t *cx,
948ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const USetAdder *sa,
949c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            UBool useFallback,
950ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t minLength,
951ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UChar32 c,
952ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
953ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t sectionIndex,
954ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode *pErrorCode) {
955ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *fromUSectionUChars;
956ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *fromUSectionValues;
957ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
958ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
959ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, count;
960ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
961ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex;
962ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex;
963ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
964ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* read first pair of the section */
965ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=*fromUSectionUChars++;
966ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value=*fromUSectionValues++;
967ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
968ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( value!=0 &&
969c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
970ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ) {
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(c>=0) {
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* add the initial code point */
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sa->add(sa->set, c);
975ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* add the string so far */
977ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sa->addString(sa->set, s, length);
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<count; ++i) {
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* append this code unit and recurse or add the string */
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s[length]=fromUSectionUChars[i];
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=fromUSectionValues[i];
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(value==0) {
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no mapping, do nothing */
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_extGetUnicodeSetString(
990c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                sharedData, cx, sa, useFallback, minLength,
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                U_SENTINEL, s, length+1,
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pErrorCode);
994c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        } else if((useFallback ?
995c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
996c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
997c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                  UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ) {
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sa->addString(sa->set, s, length+1);
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const USetAdder *sa,
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      UConverterUnicodeSet which,
1009c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      UConverterSetFilter filter,
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      UErrorCode *pErrorCode) {
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const int32_t *cx;
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint16_t *stage12, *stage3, *ps2, *ps3;
1013ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *stage3b;
1014ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t st1, stage1Length, st2, st3, minLength;
1017c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    UBool useFallback;
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar s[UCNV_EXT_MAX_UCHARS];
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cx=sharedData->mbcs.extIndexes;
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL) {
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1027ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1034c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
1035c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* enumerate the from-Unicode trie table */
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=0; /* keep track of the current code point while enumerating */
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1039c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(filter==UCNV_SET_FILTER_2022_CN) {
1040c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        minLength=3;
1041c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
1042c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru               filter!=UCNV_SET_FILTER_NONE
1043c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    ) {
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* DBCS-only, ignore single-byte results */
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength=2;
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength=1;
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the trie enumeration is almost the same as
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
1054ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(st1=0; st1<stage1Length; ++st1) {
1055ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        st2=stage12[st1];
1056ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(st2>stage1Length) {
1057ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ps2=stage12+st2;
1058ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(st2=0; st2<64; ++st2) {
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) {
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* read the stage 3 block */
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ps3=stage3+st3;
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * Add code points for which the roundtrip flag is set.
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * Do not add <subchar1> entries or other (future?) pseudo-entries
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * with an output length of 0, or entries with reserved bits set.
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * Recurse for partial results.
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    do {
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        value=stage3b[*ps3++];
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(value==0) {
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* no mapping, do nothing */
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            length=0;
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            U16_APPEND_UNSAFE(s, length, c);
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ucnv_extGetUnicodeSetString(
1077c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                sharedData, cx, sa, useFallback, minLength,
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                c, s, length,
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                pErrorCode);
1081c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                        } else if((useFallback ?
1082c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
1083c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
1084c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                  UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        ) {
1087c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            switch(filter) {
1088c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_2022_CN:
1089c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
1090c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1091c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1092c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1093c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_SJIS:
1094c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
1095c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1096c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1097c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1098c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_GR94DBCS:
1099c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
1100c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
1101c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
1102c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1103c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1104c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1105c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_HZ:
1106c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
1107c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
1108c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
1109c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1110c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1111c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1112c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            default:
1113c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                /*
1114c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 * UCNV_SET_FILTER_NONE,
1115c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
1116c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 */
1117c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1118c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            }
1119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            sa->add(sa->set, c);
1120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } while((++c&0xf)!=0);
1122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c+=16; /* empty stage 3 block */
1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c+=1024; /* empty stage 2 block */
1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1133