1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Copyright (C) 2003-2013, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************
8103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   file name:  ucnv_ext.cpp
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2003jun13
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Markus W. Scherer
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Conversion extensions
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uset.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_bld.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_cnv.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucnv_ext.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
28103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* to Unicode --------------------------------------------------------------- */
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return lookup value for the byte, if found; else 0
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint32_t
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) {
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t word0, word;
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, start, limit;
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* check the input byte against the lowest and highest section bytes */
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]);
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]);
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(byte<start || limit<byte) {
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* the byte is out of range */
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length==((limit-start)+1)) {
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* direct access on a linear array */
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0);
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Shift byte once instead of each section word and add 0xffffff.
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * We will compare the shifted/added byte (bbffffff) against
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * section words which have byte values in the same bit position.
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * for all v=0..f
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * so we need not mask off the lower 24 bits of each section word.
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    word=word0|UCNV_EXT_TO_U_VALUE_MASK;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* binary search */
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=0;
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=length;
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=limit-start;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=1) {
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* done */
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* start<limit-1 */
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=4) {
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* linear search for the last part */
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(word0<=toUSection[start]) {
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && word0<=toUSection[start]) {
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && word0<=toUSection[start]) {
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* always break at start==limit-1 */
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++start;
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=(start+limit)/2;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(word<toUSection[i]) {
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=i;
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start=i;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* did we really find it? */
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) {
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* not found */
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * TRUE if not an SI/SO stateful converter,
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or if the match length fits with the current converter state
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((sisoState)<0 || ((sisoState)==0) == (match==1))
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this works like ucnv_extMatchFromU() except
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the first character is in pre
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - no trie is used
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - the returned matchLength is not offset by 2
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 const char *pre, int32_t preLength,
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 const char *src, int32_t srcLength,
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 uint32_t *pMatchValue,
125103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                 UBool /*useFallback*/, UBool flush) {
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *toUTable, *toUSection;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value, matchValue;
12985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, j, idx, length, matchLength;
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t b;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* no extension data, no match */
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* initialize */
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
13885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=0;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    matchValue=0;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    i=j=matchLength=0;
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(sisoState==0) {
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(preLength>1) {
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0; /* no match of a DBCS sequence in SBCS mode */
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(preLength==1) {
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            srcLength=0;
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else /* preLength==0 */ {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(srcLength>1) {
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                srcLength=1;
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        flush=TRUE;
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* we must not remember fallback matches when not using fallbacks */
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* match input units until there is a full match or the input is consumed */
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* go to the next section */
16285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        toUSection=toUTable+idx;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* read first pair of the section */
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=*toUSection++;
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=UCNV_EXT_TO_U_GET_BYTE(value);
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=UCNV_EXT_TO_U_GET_VALUE(value);
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if( value!=0 &&
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             TO_U_USE_FALLBACK(useFallback)) &&
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ) {
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* remember longest match so far */
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchValue=value;
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchLength=i+j;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* match pre[] then src[] */
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<preLength) {
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=(uint8_t)pre[i++];
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(j<srcLength) {
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            b=(uint8_t)src[j++];
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* all input consumed, partial match */
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) {
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * end of the entire input stream, stop with the longest match so far
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * or: partial match must not be longer than UCNV_EXT_MAX_BYTES
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * because it must fit into state buffers
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* continue with more input next time */
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return -length;
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* search for the current UChar */
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=ucnv_extFindToU(toUSection, length, b);
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(value==0) {
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no match here, stop with the longest match so far */
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* partial match, continue */
20685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     TO_U_USE_FALLBACK(useFallback)) &&
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j)
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ) {
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* full match, stop with result */
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    matchValue=value;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    matchLength=i+j;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* full match on fallback not taken, stop with the longest match so far */
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(matchLength==0) {
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* no match at all */
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* return result */
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue);
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return matchLength;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
233103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteToU(UConverter *cnv, const int32_t *cx,
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 uint32_t value,
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UChar **target, const UChar *targetLimit,
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 int32_t **offsets, int32_t srcIndex,
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 UErrorCode *pErrorCode) {
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output the result */
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* output a single code point */
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_toUWriteCodePoint(
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value),
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target, targetLimit,
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsets, srcIndex,
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pErrorCode);
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* output a string - with correct data we have resultLength>0 */
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_toUWriteUChars(
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv,
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UCNV_EXT_TO_U_GET_INDEX(value),
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UCNV_EXT_TO_U_GET_LENGTH(value),
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target, targetLimit,
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            offsets, srcIndex,
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pErrorCode);
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS),
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or 1 for DBCS-only,
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * or -1 if the converter is not SI/SO stateful
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Note: For SI/SO stateful converters getting here,
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * cnv->mode==0 is equivalent to firstLength==1.
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define UCNV_SISO_STATE(cnv) \
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1)
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        int32_t firstLength,
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        const char **src, const char *srcLimit,
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UChar **target, const UChar *targetLimit,
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        int32_t **offsets, int32_t srcIndex,
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UBool flush,
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        UErrorCode *pErrorCode) {
28385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv),
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           (const char *)cnv->toUBytes, firstLength,
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           *src, (int32_t)(srcLimit-*src),
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->useFallback, flush);
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>0) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* advance src pointer for the consumed input */
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src+=match-firstLength;
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result to target */
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteToU(cnv, cx,
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         value,
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         target, targetLimit,
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         offsets, srcIndex,
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *s;
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* copy the first code point */
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=(const char *)cnv->toUBytes;
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToUFirstLength=(int8_t)firstLength;
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=0; j<firstLength; ++j) {
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now copy the newly consumed input */
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=*src;
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match;
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(; j<match; ++j) {
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src=s; /* same as *src=srcLimit; because we reached the end of input */
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)match;
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 no match */ {
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UChar32
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchToU(const int32_t *cx,
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       const char *source, int32_t length,
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       UBool useFallback) {
33385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=0) {
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0xffff;
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cx, -1,
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           source, length,
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           NULL, 0,
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           useFallback, TRUE);
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match==length) {
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result for simple, single-character conversion */
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) {
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return UCNV_EXT_TO_U_GET_CODE_POINT(value);
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return no match because
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>0 && value points to string: simple conversion cannot handle multiple code points
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>0 && match!=length: not all input consumed, forbidden for this function
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==0: no match found in the first place
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0xfffe;
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchToU(UConverter *cnv,
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UErrorCode *pErrorCode) {
37185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match, length;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->preToU, cnv->preToULength,
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &value,
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           cnv->useFallback, pArgs->flush);
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>0) {
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match>=cnv->preToULength) {
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* advance src pointer for the consumed input */
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pArgs->source+=match-cnv->preToULength;
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToULength=0;
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the match did not use all of preToU[] - keep the rest for replay */
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            length=cnv->preToULength-match;
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preToU, cnv->preToU+match, length);
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToULength=(int8_t)-length;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result */
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes,
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         value,
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         &pArgs->target, pArgs->targetLimit,
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         &pArgs->offsets, srcIndex,
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const char *s;
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* just _append_ the newly consumed input to preToU[] */
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=pArgs->source;
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match;
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=cnv->preToULength; j<match; ++j) {
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preToU[j]=*s++;
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)match;
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 */ {
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * no match
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We need to split the previous input into two parts:
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 1. The first codepage character is unmappable - that's how we got into
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    trying the extension data in the first place.
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    We need to move it from the preToU buffer
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    to the error buffer, set an error code,
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    and prepare the rest of the previous input for 2.
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 2. The rest of the previous input must be converted once we
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    come back from the callback for the first character.
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    At that time, we have to try again from scratch to convert
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    these input characters.
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    The replay will be handled by the ucnv.c conversion code.
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the first codepage character to the error field */
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength);
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->toULength=cnv->preToUFirstLength;
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the rest up inside the buffer */
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=cnv->preToULength-cnv->preToUFirstLength;
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(length>0) {
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length);
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mark preToU for replay */
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preToULength=(int8_t)-length;
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the error code for unassigned */
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INVALID_CHAR_FOUND;
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* from Unicode ------------------------------------------------------------- */
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius// Use roundtrips, "good one-way" mappings, and some normal fallbacks.
4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic inline UBool
4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusextFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) {
4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return
4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 ||
4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0;
4568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
4578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return index of the UChar, if found; else <0
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
461103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline int32_t
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) {
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, start, limit;
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* binary search */
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start=0;
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=length;
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(;;) {
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=limit-start;
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=1) {
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* done */
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* start<limit-1 */
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(i<=4) {
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* linear search for the last part */
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(u<=fromUSection[start]) {
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && u<=fromUSection[start]) {
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(++start<limit && u<=fromUSection[start]) {
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* always break at start==limit-1 */
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++start;
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=(start+limit)/2;
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(u<fromUSection[i]) {
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            limit=i;
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start=i;
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* did we really find it? */
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(start<limit && u==fromUSection[start]) {
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return start;
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return -1; /* not found */
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param cx pointer to extension data; if NULL, returns 0
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param firstCP the first code point before all the other UChars
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pre UChars that must match; !initialMatch: partial match with them
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param preLength length of pre, >=0
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param src UChars that can be used to complete a match
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param srcLength length of src, >=0
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param pMatchValue [out] output result value for the match from the data structure
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param useFallback "use fallback" flag, usually from cnv->useFallback
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param flush TRUE if the end of the input stream is reached
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return >1: matched, return value=total match length (number of input units matched)
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          1: matched, no mapping but request for <subchar1>
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (only for the first code point)
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          0: no match
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         <0: partial match, return value=negative total match length
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (partial matches are never returned for flush==TRUE)
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS)
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         the matchLength is 2 if only firstCP matched, and >2 if firstCP and
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *         further code units matched
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extMatchFromU(const int32_t *cx,
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UChar32 firstCP,
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   const UChar *pre, int32_t preLength,
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   const UChar *src, int32_t srcLength,
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   uint32_t *pMatchValue,
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UBool useFallback, UBool flush) {
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint16_t *stage12, *stage3;
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *stage3b;
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *fromUTableUChars, *fromUSectionUChars;
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *fromUTableValues, *fromUSectionValues;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value, matchValue;
54185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t i, j, idx, length, matchLength;
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar c;
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL) {
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* no extension data, no match */
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* trie lookup of firstCP */
54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=firstCP>>10; /* stage 1 index */
55085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0; /* the first code point is outside the trie */
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
55685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
55985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    value=stage3b[idx];
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(value==0) {
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 0;
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Do not interpret values with reserved bits used, for forward compatibility,
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * and do not even remember intermediate results with reserved bits used.
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* partial match, enter the loop below */
57285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* initialize */
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t);
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        matchValue=0;
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i=j=matchLength=0;
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* we must not remember fallback matches when not using fallbacks */
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* match input units until there is a full match or the input is consumed */
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;) {
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* go to the next section */
58685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fromUSectionUChars=fromUTableUChars+idx;
58785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            fromUSectionValues=fromUTableValues+idx;
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* read first pair of the section */
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            length=*fromUSectionUChars++;
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            value=*fromUSectionValues++;
5928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) {
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* remember longest match so far */
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                matchValue=value;
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                matchLength=2+i+j;
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* match pre[] then src[] */
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(i<preLength) {
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=pre[i++];
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if(j<srcLength) {
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c=src[j++];
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* all input consumed, partial match */
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) {
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /*
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * end of the entire input stream, stop with the longest match so far
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     * because it must fit into state buffers
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                     */
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* continue with more input next time */
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return -(2+length);
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* search for the current UChar */
61985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
62085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(idx<0) {
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* no match here, stop with the longest match so far */
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
62485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                value=fromUSectionValues[idx];
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* partial match, continue */
62785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho                    idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
6298393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                    if(extFromUUseMapping(useFallback, value, firstCP)) {
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* full match, stop with result */
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        matchValue=value;
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        matchLength=2+i+j;
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } else {
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* full match on fallback not taken, stop with the longest match so far */
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    break;
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(matchLength==0) {
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no match at all */
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* result from firstCP trie lookup */ {
6468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(extFromUUseMapping(useFallback, value, firstCP)) {
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* full match, stop with result */
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchValue=value;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            matchLength=2;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* fallback not taken */
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return 0;
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* return result */
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return 1; /* assert matchLength==2 */
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *pMatchValue=matchValue;
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return matchLength;
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
668103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline void
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   uint32_t value,
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   char **target, const char *targetLimit,
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   int32_t **offsets, int32_t srcIndex,
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UErrorCode *pErrorCode) {
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint8_t buffer[1+UCNV_EXT_MAX_BYTES];
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint8_t *result;
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length, prevLength;
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length=UCNV_EXT_FROM_U_GET_LENGTH(value);
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* output the result */
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Generate a byte array and then write it below.
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * This is not the fastest possible way, but it should be ok for
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * extension mappings, and it is much simpler.
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * Offset and overflow handling are only done once this way.
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        switch(length) {
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        case 3:
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(value>>16);
693103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case 2: /*fall through*/
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)(value>>8);
695103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case 1: /*fall through*/
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *p++=(uint8_t)value;
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        default:
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break; /* will never occur */
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result=buffer+1;
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* with correct data we have length>0 */
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((prevLength=cnv->fromUnicodeStatus)!=0) {
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* handle SI/SO stateful output */
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint8_t shiftByte;
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(prevLength>1 && length==1) {
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* change from double-byte mode to single-byte */
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=(uint8_t)UCNV_SI;
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->fromUnicodeStatus=1;
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(prevLength==1 && length>1) {
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* change from single-byte mode to double-byte */
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=(uint8_t)UCNV_SO;
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->fromUnicodeStatus=2;
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            shiftByte=0;
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(shiftByte!=0) {
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* prepend the shift byte to the result bytes */
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buffer[0]=shiftByte;
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(result!=buffer+1) {
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                uprv_memcpy(buffer+1, result, length);
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            result=buffer;
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++length;
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_fromUWriteBytes(cnv, (const char *)result, length,
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         target, targetLimit,
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         offsets, srcIndex,
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         pErrorCode);
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * target<targetLimit; set error code for overflow
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC UBool
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UChar32 cp,
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          const UChar **src, const UChar *srcLimit,
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          char **target, const char *targetLimit,
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          int32_t **offsets, int32_t srcIndex,
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UBool flush,
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                          UErrorCode *pErrorCode) {
75185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cx, cp,
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             *src, (int32_t)(srcLimit-*src),
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->useFallback, flush);
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* reject a match if the result is a single byte for DBCS-only */
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if( match>=2 &&
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 &&
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru          cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY)
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ) {
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* advance src pointer for the consumed input */
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src+=match-2; /* remove 2 for the initial code point */
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result to target */
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteFromU(cnv, cx,
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           value,
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           target, targetLimit,
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           offsets, srcIndex,
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pErrorCode);
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *s;
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* copy the first code point */
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=cp;
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* now copy the newly consumed input */
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=*src;
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match-2; /* remove 2 for the initial code point */
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=0; j<match; ++j) {
788ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromU[j]=*s++;
789ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
790ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *src=s; /* same as *src=srcLimit; because we reached the end of input */
791ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=(int8_t)match;
792ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
793ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match==1) {
794ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* matched, no mapping but request for <subchar1> */
795ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->useSubChar1=TRUE;
796ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
797ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 no match */ {
798ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
799ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
800ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
801ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
802ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
803ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Used by ISO 2022 implementation.
804ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
805ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
806ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC int32_t
807ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extSimpleMatchFromU(const int32_t *cx,
808ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UChar32 cp, uint32_t *pValue,
809ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         UBool useFallback) {
810ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
811ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
812ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
813ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* try to match */
814ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cx,
815ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cp,
816ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
817ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             NULL, 0,
818ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
819ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             useFallback, TRUE);
820ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>=2) {
821ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result for simple, single-character conversion */
822ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t length;
823ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int isRoundtrip;
824ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
825ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
826ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length=UCNV_EXT_FROM_U_GET_LENGTH(value);
827ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
828ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
829ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
830ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pValue=value;
831ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return isRoundtrip ? length : -length;
832ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if 0 /* not currently used */
833ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(length==4) {
834ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* de-serialize a 4-byte result */
835ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value;
836ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *pValue=
837ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[0]<<24)|
838ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[1]<<16)|
839ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ((uint32_t)result[2]<<8)|
840ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                result[3];
841ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return isRoundtrip ? 4 : -4;
842ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
843ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
844ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
845ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
846ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
847ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * return no match because
848ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match>1 && resultLength>4: result too long for simple conversion
849ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==1: no match found, <subchar1> preferred
850ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match==0: no match found in the first place
851ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * - match<0: partial match, not supported for simple conversion (and flush==TRUE)
852ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
853ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
854ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
855ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
856ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
857ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * continue partial match with new input, requires cnv->preFromUFirstCP>=0
858ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * never called for simple, single-character conversion
859ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
860ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
861ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extContinueMatchFromU(UConverter *cnv,
862ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
863ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode *pErrorCode) {
86485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
865ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t match;
866ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
867ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
868ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->preFromUFirstCP,
869ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->preFromU, cnv->preFromULength,
870ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source),
871ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             &value,
872ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                             cnv->useFallback, pArgs->flush);
873ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(match>=2) {
874ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match-=2; /* remove 2 for the initial code point */
875ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
876ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match>=cnv->preFromULength) {
877ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* advance src pointer for the consumed input */
878ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pArgs->source+=match-cnv->preFromULength;
879ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromULength=0;
880ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
881ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* the match did not use all of preFromU[] - keep the rest for replay */
882ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t length=cnv->preFromULength-match;
883ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR);
884ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromULength=(int8_t)-length;
885ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
886ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
887ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* finish the partial match */
888ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=U_SENTINEL;
889ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
890ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* write result */
891ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes,
892ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           value,
893ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pArgs->target, pArgs->targetLimit,
894ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           &pArgs->offsets, srcIndex,
895ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           pErrorCode);
896ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else if(match<0) {
897ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* save state for partial match */
898ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        const UChar *s;
899ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t j;
900ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
901ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* just _append_ the newly consumed input to preFromU[] */
902ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s=pArgs->source;
903ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        match=-match-2; /* remove 2 for the initial code point */
904ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(j=cnv->preFromULength; j<match; ++j) {
905103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            U_ASSERT(j>=0);
906ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->preFromU[j]=*s++;
907ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
908ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */
909ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=(int8_t)match;
910ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else /* match==0 or 1 */ {
911ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
912ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * no match
913ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
914ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * We need to split the previous input into two parts:
915ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
916ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 1. The first code point is unmappable - that's how we got into
917ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    trying the extension data in the first place.
918ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    We need to move it from the preFromU buffer
919ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    to the error buffer, set an error code,
920ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    and prepare the rest of the previous input for 2.
921ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *
922ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * 2. The rest of the previous input must be converted once we
923ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    come back from the callback for the first code point.
924ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    At that time, we have to try again from scratch to convert
925ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    these input characters.
926ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         *    The replay will be handled by the ucnv.c conversion code.
927ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
928ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
929ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(match==1) {
930ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* matched, no mapping but request for <subchar1> */
931ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            cnv->useSubChar1=TRUE;
932ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
933ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
934ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* move the first code point to the error field */
935ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->fromUChar32=cnv->preFromUFirstCP;
936ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromUFirstCP=U_SENTINEL;
937ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
938ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* mark preFromU for replay */
939ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cnv->preFromULength=-cnv->preFromULength;
940ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
941ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the error code for unassigned */
942ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *pErrorCode=U_INVALID_CHAR_FOUND;
943ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
944ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
945ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
9468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusstatic UBool
9478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusextSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) {
9488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if(which==UCNV_ROUNDTRIP_SET) {
9498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Add only code points for which the roundtrip flag is set.
9508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Do not add any fallbacks, even if ucnv_fromUnicode() would use them
9518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet().
9528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        //
9538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // By analogy, also do not add "good one-way" mappings.
9548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        //
9558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Do not add entries with reserved bits set.
9568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!=
9578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) {
9588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return FALSE;
9598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
9608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
9618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        // Do not add entries with reserved bits set.
9628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) {
9638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            return FALSE;
9648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        }
9658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    }
9668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // Do not add <subchar1> entries or other (future?) pseudo-entries
9678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    // with an output length of 0.
9688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength;
9698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius}
9708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius
971ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
972ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
973ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const int32_t *cx,
974ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            const USetAdder *sa,
9758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                            UConverterUnicodeSet which,
976ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t minLength,
9778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                            UChar32 firstCP,
978ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
979ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            int32_t sectionIndex,
980ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            UErrorCode *pErrorCode) {
981ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UChar *fromUSectionUChars;
982ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *fromUSectionValues;
983ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
984ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
985ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i, count;
986ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
987ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex;
988ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex;
989ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
990ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* read first pair of the section */
991ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    count=*fromUSectionUChars++;
992ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value=*fromUSectionValues++;
993ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
9948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius    if(extSetUseMapping(which, minLength, value)) {
9958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        if(length==U16_LENGTH(firstCP)) {
996ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* add the initial code point */
9978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius            sa->add(sa->set, firstCP);
998ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
999ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* add the string so far */
1000ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sa->addString(sa->set, s, length);
1001ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1002ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1003ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1004ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<count; ++i) {
1005ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* append this code unit and recurse or add the string */
1006ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s[length]=fromUSectionUChars[i];
1007ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=fromUSectionValues[i];
1008ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1009ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(value==0) {
1010ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* no mapping, do nothing */
1011ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
1012ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_extGetUnicodeSetString(
10138393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                sharedData, cx, sa, which, minLength,
10148393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                firstCP, s, length+1,
1015ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
1016ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                pErrorCode);
10178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius        } else if(extSetUseMapping(which, minLength, value)) {
1018ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            sa->addString(sa->set, s, length+1);
1019ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1020ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1021ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1022ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1023ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CFUNC void
1024ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
1025ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const USetAdder *sa,
1026ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      UConverterUnicodeSet which,
1027c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                      UConverterSetFilter filter,
1028ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      UErrorCode *pErrorCode) {
1029ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const int32_t *cx;
1030ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint16_t *stage12, *stage3, *ps2, *ps3;
1031ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const uint32_t *stage3b;
1032ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1033ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t value;
1034ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t st1, stage1Length, st2, st3, minLength;
1035ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1036ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar s[UCNV_EXT_MAX_UCHARS];
1037ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c;
1038ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
1039ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1040ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cx=sharedData->mbcs.extIndexes;
1041ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cx==NULL) {
1042ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
1043ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1044ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1045ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
1046ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
1047ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
1048ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1049ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
1050ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1051ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* enumerate the from-Unicode trie table */
1052ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c=0; /* keep track of the current code point while enumerating */
1053ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1054c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    if(filter==UCNV_SET_FILTER_2022_CN) {
1055c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru        minLength=3;
1056c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
1057c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru               filter!=UCNV_SET_FILTER_NONE
1058c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru    ) {
1059ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* DBCS-only, ignore single-byte results */
1060ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength=2;
1061ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
1062ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        minLength=1;
1063ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1064ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1065ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /*
1066ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * the trie enumeration is almost the same as
1067ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1
1068ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
1069ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(st1=0; st1<stage1Length; ++st1) {
1070ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        st2=stage12[st1];
1071ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(st2>stage1Length) {
1072ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ps2=stage12+st2;
1073ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(st2=0; st2<64; ++st2) {
1074ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) {
1075ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* read the stage 3 block */
1076ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    ps3=stage3+st3;
1077ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1078ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    do {
1079ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        value=stage3b[*ps3++];
1080ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        if(value==0) {
1081ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            /* no mapping, do nothing */
1082ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
10838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                            // Recurse for partial results.
1084ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            length=0;
1085ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            U16_APPEND_UNSAFE(s, length, c);
1086ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            ucnv_extGetUnicodeSetString(
10878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                                sharedData, cx, sa, which, minLength,
1088ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                c, s, length,
1089ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
1090ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                pErrorCode);
10918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius                        } else if(extSetUseMapping(which, minLength, value)) {
1092c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            switch(filter) {
1093c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_2022_CN:
1094c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
1095c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1096c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1097c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1098c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_SJIS:
1099c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
1100c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1101c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1102c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1103c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_GR94DBCS:
1104c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
1105c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
1106c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
1107c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1108c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1109c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1110c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            case UCNV_SET_FILTER_HZ:
1111c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
1112c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
1113c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
1114c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                    continue;
1115c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                }
1116c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1117c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            default:
1118c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                /*
1119c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 * UCNV_SET_FILTER_NONE,
1120c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
1121c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                 */
1122c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                                break;
1123c0f3e2506e4cc62ff8c220fe72849728e9d6cecfJean-Baptiste Queru                            }
1124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            sa->add(sa->set, c);
1125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        }
1126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    } while((++c&0xf)!=0);
1127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
1128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    c+=16; /* empty stage 3 block */
1129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
1130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
1131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
1132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c+=1024; /* empty stage 2 block */
1133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
1134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
1135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
1136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
1137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
1138