1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2001-2010, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File ustrtrns.c
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History:
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   9/10/2001    Ram    Creation.
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*******************************************************************************
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * u_strTo* and u_strFrom* APIs
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * WCS functions moved to ustr_wcs.c for better modularization
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *******************************************************************************
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32WithSub(UChar *dest,
35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t destCapacity,
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               int32_t *pDestLength,
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               const UChar32 *src,
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               int32_t srcLength,
39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UChar32 subchar, int32_t *pNumSubstitutions,
40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UErrorCode *pErrorCode) {
41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UChar32 *srcLimit;
42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 ch;
43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar *destLimit;
44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar *pDest;
45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t reqLength;
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t numSubstitutions;
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(U_FAILURE(*pErrorCode)){
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ) {
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = 0;
62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    pDest = dest;
65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    destLimit = dest + destCapacity;
66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength = 0;
67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    numSubstitutions = 0;
68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(srcLength < 0) {
70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* simple loop for conversion of a NUL-terminated BMP string */
71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        while((ch=*src) != 0 &&
72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if(pDest < destLimit) {
75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                *pDest++ = (UChar)ch;
76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++reqLength;
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src;
81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(ch != 0) {
82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* "complicated" case, find the end of the remaining string */
83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            while(*++srcLimit != 0) {}
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    } else {
86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src + srcLength;
87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /* convert with length */
90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    while(src < srcLimit) {
91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        ch = *src++;
92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        do {
93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* usually "loops" once; twice only for writing subchar */
94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if(pDest < destLimit) {
96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = (UChar)ch;
97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    ++reqLength;
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                break;
101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else if(0x10000 <= ch && ch <= 0x10ffff) {
102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if((pDest + 2) <= destLimit) {
103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = U16_LEAD(ch);
104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = U16_TRAIL(ch);
105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    reqLength += 2;
107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                break;
109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else if((ch = subchar) < 0) {
110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                /* surrogate code point, or not a Unicode code point at all */
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++numSubstitutions;
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } while(TRUE);
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength += (int32_t)(pDest - dest);
120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pDestLength) {
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = numSubstitutions;
125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar* U_EXPORT2
134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32(UChar *dest,
135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t destCapacity,
136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t *pDestLength,
137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               const UChar32 *src,
138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t srcLength,
139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UErrorCode *pErrorCode) {
140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    return u_strFromUTF32WithSub(
141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            dest, destCapacity, pDestLength,
142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            src, srcLength,
143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            U_SENTINEL, NULL,
144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            pErrorCode);
145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32* U_EXPORT2
148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32WithSub(UChar32 *dest,
149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t destCapacity,
150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t *pDestLength,
151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             const UChar *src,
152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t srcLength,
153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UChar32 subchar, int32_t *pNumSubstitutions,
154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UErrorCode *pErrorCode) {
155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UChar *srcLimit;
156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 ch;
157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar ch2;
158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 *destLimit;
159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 *pDest;
160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t reqLength;
161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t numSubstitutions;
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(U_FAILURE(*pErrorCode)){
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ) {
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = 0;
177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    pDest = dest;
180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    destLimit = dest + destCapacity;
181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength = 0;
182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    numSubstitutions = 0;
183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(srcLength < 0) {
185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* simple loop for conversion of a NUL-terminated BMP string */
186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if(pDest < destLimit) {
189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                *pDest++ = ch;
190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++reqLength;
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src;
195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(ch != 0) {
196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* "complicated" case, find the end of the remaining string */
197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            while(*++srcLimit != 0) {}
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src + srcLength;
201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /* convert with length */
204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    while(src < srcLimit) {
205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        ch = *src++;
206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(!U16_IS_SURROGATE(ch)) {
207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* write or count ch below */
208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ch = U16_GET_SUPPLEMENTARY(ch, ch2);
211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else if((ch = subchar) < 0) {
212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* unpaired surrogate */
213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            *pErrorCode = U_INVALID_CHAR_FOUND;
214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            return NULL;
215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else {
216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++numSubstitutions;
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(pDest < destLimit) {
219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            *pDest++ = ch;
220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else {
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++reqLength;
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength += (int32_t)(pDest - dest);
226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pDestLength) {
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = numSubstitutions;
231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar32* U_EXPORT2
240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32(UChar32 *dest,
241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t destCapacity,
242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t *pDestLength,
243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             const UChar *src,
244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t srcLength,
245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UErrorCode *pErrorCode) {
246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    return u_strToUTF32WithSub(
247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            dest, destCapacity, pDestLength,
248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            src, srcLength,
249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            U_SENTINEL, NULL,
250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            pErrorCode);
251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* for utf8_nextCharSafeBodyTerminated() */
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences:
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - checks for NUL termination instead of length
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1)
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte.
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte.
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *s=*ps;
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t trail, illegal=0;
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UTF8_MASK_LEAD_BYTE((c), count);
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* each branch falls through to the next one */
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 5:
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        illegal=1;
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f || c>=0x110) {
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte, or code point>0x10ffff (outside Unicode) */
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 2:
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f) {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte */
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 1:
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f) {
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte */
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0:
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return U_SENTINEL;
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* no default branch to optimize switch()  - all values are covered */
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* illegal is also set if count>=4 */
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error handling */
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* don't go beyond this sequence */
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=*ps;
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(count>0 && UTF8_IS_TRAIL(*s)) {
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++s;
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --count;
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=U_SENTINEL;
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *ps=s;
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences:
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1)
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte.
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte.
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *s=*ps;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t trail, illegal=0;
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t count=UTF8_COUNT_TRAIL_BYTES(c);
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((limit-s)>=count) {
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UTF8_MASK_LEAD_BYTE((c), count);
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch(count) {
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* each branch falls through to the next one */
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 5:
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 4:
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 3:
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0x110) {
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                illegal|=(trail&0xc0)^0x80;
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* code point>0x10ffff, outside Unicode */
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                illegal=1;
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 2:
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal|=(trail&0xc0)^0x80;
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 1:
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal|=(trail&0xc0)^0x80;
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0:
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return U_SENTINEL;
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* no default branch to optimize switch()  - all values are covered */
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        illegal=1; /* too few bytes left */
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* illegal is also set if count>=4 */
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) {
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error handling */
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* don't go beyond this sequence */
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=*ps;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) {
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++s;
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --count;
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=U_SENTINEL;
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *ps=s;
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8WithSub(UChar *dest,
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t destCapacity,
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t *pDestLength,
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const char* src,
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t srcLength,
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UChar32 subchar, int32_t *pNumSubstitutions,
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode){
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDest = dest;
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDestLimit = dest+destCapacity;
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 ch;
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength = 0;
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t* pSrc = (const uint8_t*) src;
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t t1, t2; /* trail bytes */
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t numSubstitutions;
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions=0;
424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    numSubstitutions=0;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Inline processing of UTF-8 byte sequences:
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Byte sequences for the most common characters are handled inline in
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the conversion loops. In order to reduce the path lengths for those
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * characters, the tests are arranged in a kind of binary search.
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * ASCII (<=0x7f) is checked first, followed by the dividing point
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * between 2- and 3-byte sequences (0xe0).
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The 3-byte branch is tested first to speed up CJK text.
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The compiler should combine the subtractions for the two tests for 0xe0.
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Each branch then tests for the other end of its range.
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength < 0){
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Transform a NUL-terminated string.
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * The code explicitly checks for NULs only in the lead byte position.
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * A NUL byte in the trail byte position fails the trail byte range check anyway.
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch<=0xFFFF) {
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=(UChar)ch;
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=UTF16_LEAD(ch);
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest<pDestLimit) {
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++)=UTF16_TRAIL(ch);
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Pre-flight the rest of the string. */
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch = *pSrc) != 0) {
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++reqLength;
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++reqLength;
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength += U16_LENGTH(ch);
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* srcLength >= 0 */ {
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *pSrcLimit = pSrc + srcLength;
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t count;
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Each iteration of the inner loop progresses by at most 3 UTF-8
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * bytes and one UChar, for most characters.
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * For supplementary code points (4 & 2), which are rare,
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * there is an additional adjustment.
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count = (int32_t)(pDestLimit - pDest);
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count > srcLength) {
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                count = srcLength; /* min(remaining dest, remaining src/3) */
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count < 3) {
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * Too much overhead if we get near the end of the string,
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * continue with the next loop.
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *pSrc;
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch <= 0x7f){
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(UChar)ch;
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch > 0xe0) {
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if( /* handle U+1000..U+CFFF inline */
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch <= 0xec &&
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ) {
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            pSrc += 3;
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            continue;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else if(ch < 0xe0) {
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if( /* handle U+0080..U+07FF inline */
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch >= 0xc2 &&
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ) {
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            pSrc += 2;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            continue;
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch >= 0xf0 || subchar > 0xffff) {
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /*
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * We may read up to six bytes and write up to two UChars,
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * which we didn't account for with computing count,
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * so we adjust it here.
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         */
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(--count == 0) {
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* function call for "complicated" and error cases */
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc; /* continue after the lead byte */
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pErrorCode = U_INVALID_CHAR_FOUND;
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return NULL;
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else if(ch<=0xFFFF){
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++)=(UChar)ch;
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else{
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++)=UTF16_LEAD(ch);
60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=UTF16_TRAIL(ch);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(--count > 0);
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc;
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 3) &&
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 2) &&
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }else if(ch<=0xFFFF){
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=(UChar)ch;
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }else{
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=UTF16_LEAD(ch);
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest<pDestLimit){
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++)=UTF16_TRAIL(ch);
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else{
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* do not fill the dest buffer just count the UChars needed */
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc < pSrcLimit){
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc;
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength++;
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 3) &&
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 2) &&
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=UTF_CHAR_LENGTH(ch);
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - dest);
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pNumSubstitutions=numSubstitutions;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8(UChar *dest,
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t destCapacity,
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t *pDestLength,
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const char* src,
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t srcLength,
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode){
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_strFromUTF8WithSub(
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest, destCapacity, pDestLength,
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            src, srcLength,
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SENTINEL, NULL,
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pErrorCode);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar * U_EXPORT2
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8Lenient(UChar *dest,
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t destCapacity,
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t *pDestLength,
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     const char *src,
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t srcLength,
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UErrorCode *pErrorCode) {
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDest = dest;
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 ch;
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength = 0;
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t* pSrc = (uint8_t*) src;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0)
75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength < 0) {
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Transform a NUL-terminated string. */
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar *pDestLimit = dest+destCapacity;
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t t1, t2, t3; /* trail bytes */
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0) {
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 2;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest < pDestLimit) {
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++) = U16_TRAIL(ch);
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength = 1;
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pDest++ = 0xfffd;
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(*++pSrc != 0) {}
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Pre-flight the rest of the string. */
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch = *pSrc) != 0) {
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0) {
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++reqLength;
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 2;
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0 && pSrc[2] != 0) {
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++reqLength;
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength += 2;
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++reqLength;
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* srcLength >= 0 */ {
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *pSrcLimit = pSrc + srcLength;
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * This function requires that if srcLength is given, then it must be
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * destCapatity >= srcLength so that we need not check for
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * destination buffer overflow in the loop.
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(destCapacity < srcLength) {
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(pDestLength != NULL) {
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDestLength = srcLength; /* this likely overestimates the true destLength! */
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((pSrcLimit - pSrc) >= 4) {
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *pSrc++;
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch < 0xc0) {
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * ASCII, or a trail byte in lead position which is treated like
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * a single-byte sequence for better character boundary
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * resynchronization after illegal sequences.
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     */
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(UChar)ch;
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) { /* U+0080..U+07FF */
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xf0) { /* U+0800..U+FFFF */
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 12) + (*pSrc++ << 6);
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else /* f0..f4 */ { /* U+10000..U+10FFFF */
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (*pSrc++ << 12);
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ << 6;
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ - 0x3c82080;
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_TRAIL(ch);
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(pSrc < pSrcLimit);
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrcLimit += 3; /* restore original pSrcLimit */
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc < pSrcLimit) {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc++;
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc < pSrcLimit) {
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pSrcLimit - pSrc) >= 2) {
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 12) + (*pSrc++ << 6);
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pSrcLimit - pSrc) >= 3) {
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (*pSrc++ << 12);
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ << 6;
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ - 0x3c82080;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_TRAIL(ch);
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pDest++ = 0xfffd;
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - dest);
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE uint8_t *
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_appendUTF8(uint8_t *pDest, UChar32 c) {
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((c)<=0x7f) {
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)c;
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(c<=0x7ff) {
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c>>6)|0xc0);
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c&0x3f)|0x80);
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(c<=0xffff) {
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c>>12)|0xe0);
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* if((uint32_t)(c)<=0x10ffff) */ {
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)>>18)|0xf0);
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pDest;
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8WithSub(char *dest,
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t destCapacity,
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t *pDestLength,
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UChar *pSrc,
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t srcLength,
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 subchar, int32_t *pNumSubstitutions,
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode *pErrorCode){
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength=0;
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t ch=0,ch2=0;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *pDest = (uint8_t *)dest;
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *pDestLimit = pDest + destCapacity;
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t numSubstitutions;
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
997b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
998b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions=0;
999b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    numSubstitutions=0;
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength==-1) {
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch=*pSrc)!=0) {
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++pSrc;
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f) {
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pDest<pDestLimit) {
100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 1;
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0x7ff) {
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 2) {
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 2;
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0xd7ff || ch >= 0xe000) {
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 3) {
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 3;
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* ch is a surrogate */ {
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t length;
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=UTF16_GET_PAIR_VALUE(ch, ch2);
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(subchar>=0) {
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=subchar;
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++numSubstitutions;
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length = U8_LENGTH(ch);
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= length) {
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* convert and append*/
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pDest=_appendUTF8(pDest, ch);
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = length;
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch=*pSrc++)!=0) {
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch<=0x7f) {
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch<=0x7ff) {
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=2;
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(!UTF_IS_SURROGATE(ch)) {
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=3;
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=4;
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(subchar>=0) {
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=U8_LENGTH(subchar);
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++numSubstitutions;
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar *pSrcLimit = pSrc+srcLength;
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t count;
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Each iteration of the inner loop progresses by at most 3 UTF-8
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * bytes and one UChar, for most characters.
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * For supplementary code points (4 & 2), which are rare,
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * there is an additional adjustment.
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count = (int32_t)((pDestLimit - pDest) / 3);
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            srcLength = (int32_t)(pSrcLimit - pSrc);
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count > srcLength) {
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                count = srcLength; /* min(remaining dest/3, remaining src) */
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count < 3) {
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * Too much overhead if we get near the end of the string,
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * continue with the next loop.
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=*pSrc++;
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch <= 0x7f) {
110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch <= 0x7ff) {
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch <= 0xd7ff || ch >= 0xe000) {
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else /* ch is a surrogate */ {
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * We will read two UChars and probably output four bytes,
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * which we didn't account for with computing count,
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * so we adjust it here.
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     */
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(--count == 0) {
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;  /* recompute count */
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) {
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++pSrc;
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch=UTF16_GET_PAIR_VALUE(ch, ch2);
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* writing 4 bytes per 2 UChars is ok */
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)((ch>>18)|0xf0);
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)((ch&0x3f)|0x80);
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else  {
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(subchar>=0) {
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch=subchar;
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ++numSubstitutions;
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pErrorCode = U_INVALID_CHAR_FOUND;
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            return NULL;
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* convert and append*/
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pDest=_appendUTF8(pDest, ch);
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(--count > 0);
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc<pSrcLimit) {
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch=*pSrc++;
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f) {
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pDest<pDestLimit) {
115050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 1;
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0x7ff) {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 2) {
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 2;
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0xd7ff || ch >= 0xe000) {
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 3) {
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 3;
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* ch is a surrogate */ {
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t length;
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=UTF16_GET_PAIR_VALUE(ch, ch2);
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(subchar>=0) {
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=subchar;
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++numSubstitutions;
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length = U8_LENGTH(ch);
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= length) {
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* convert and append*/
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pDest=_appendUTF8(pDest, ch);
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = length;
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc<pSrcLimit) {
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch=*pSrc++;
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch<=0x7f) {
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch<=0x7ff) {
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=2;
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(!UTF_IS_SURROGATE(ch)) {
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=3;
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) {
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=4;
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(subchar>=0) {
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=U8_LENGTH(subchar);
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++numSubstitutions;
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pNumSubstitutions=numSubstitutions;
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8(char *dest,
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t destCapacity,
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t *pDestLength,
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UChar *pSrc,
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t srcLength,
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode *pErrorCode){
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_strToUTF8WithSub(
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest, destCapacity, pDestLength,
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrc, srcLength,
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SENTINEL, NULL,
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pErrorCode);
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
124850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UChar* U_EXPORT2
124950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strFromJavaModifiedUTF8WithSub(
125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *dest,
125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t destCapacity,
125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t *pDestLength,
125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const char *src,
125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t srcLength,
125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 subchar, int32_t *pNumSubstitutions,
125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode *pErrorCode) {
125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *pDest = dest;
125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *pDestLimit = dest+destCapacity;
125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 ch;
126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reqLength = 0;
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t* pSrc = (const uint8_t*) src;
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *pSrcLimit;
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t count;
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t t1, t2; /* trail bytes */
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t numSubstitutions;
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* args check */
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*pErrorCode)){
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (dest==NULL && destCapacity!=0) || destCapacity<0 ||
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pNumSubstitutions!=NULL) {
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pNumSubstitutions=0;
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    numSubstitutions=0;
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcLength < 0) {
128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Transform a NUL-terminated ASCII string.
128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Handle non-ASCII strings with slower code.
128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++=(UChar)ch;
129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch == 0) {
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength=(int32_t)(pDest - dest);
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDestLength) {
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDestLength = reqLength;
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* Terminate the buffer */
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = uprv_strlen((const char *)pSrc);
130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pSrcLimit = pSrc + srcLength;
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count = (int32_t)(pDestLimit - pDest);
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = (int32_t)(pSrcLimit - pSrc);
131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* fast ASCII loop */
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint8_t *prevSrc = pSrc;
131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t delta;
131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(UChar)ch;
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc;
131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delta = (int32_t)(pSrc - prevSrc);
132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count -= delta;
132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            srcLength -= delta;
132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Each iteration of the inner loop progresses by at most 3 UTF-8
132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * bytes and one UChar.
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength /= 3;
132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count > srcLength) {
132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count = srcLength; /* min(remaining dest, remaining src/3) */
133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count < 3) {
133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Too much overhead if we get near the end of the string,
133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * continue with the next loop.
133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do {
133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ch = *pSrc;
134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch <= 0x7f){
134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(UChar)ch;
134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc;
134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(ch >= 0xe0) {
134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if( /* handle U+0000..U+FFFF inline */
134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ch <= 0xef &&
134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ) {
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pSrc += 3;
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if( /* handle U+0000..U+07FF inline */
135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ch >= 0xc0 &&
135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ) {
136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pSrc += 2;
136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(subchar < 0) {
136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pErrorCode = U_INVALID_CHAR_FOUND;
136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return NULL;
136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(subchar > 0xffff && --count == 0) {
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /*
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     * We need to write two UChars, adjusted count for that,
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     * and ran out of space.
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     */
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /* function call for error cases */
137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++pSrc; /* continue after the lead byte */
137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++numSubstitutions;
138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(subchar<=0xFFFF) {
138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=(UChar)subchar;
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_LEAD(subchar);
138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_TRAIL(subchar);
138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
138750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } while(--count > 0);
138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch = *pSrc;
139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f){
139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++=(UChar)ch;
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch >= 0xe0) {
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+FFFF inline */
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch <= 0xef &&
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 3) &&
140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 3;
140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+07FF inline */
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch >= 0xc0 &&
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 2) &&
141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 2;
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(subchar < 0) {
142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pErrorCode = U_INVALID_CHAR_FOUND;
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return NULL;
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* function call for error cases */
142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc; /* continue after the lead byte */
142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++numSubstitutions;
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(subchar<=0xFFFF) {
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *(pDest++)=(UChar)subchar;
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *(pDest++)=U16_LEAD(subchar);
143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(pDest<pDestLimit) {
143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_TRAIL(subchar);
143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        reqLength++;
143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* do not fill the dest buffer just count the UChars needed */
144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(pSrc < pSrcLimit){
144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch = *pSrc;
144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f) {
144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength++;
144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch >= 0xe0) {
145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+FFFF inline */
145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch <= 0xef &&
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 3) &&
145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[2] - 0x80) <= 0x3f
145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    reqLength++;
145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 3;
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+07FF inline */
146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch >= 0xc0 &&
146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 2) &&
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    reqLength++;
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 2;
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(subchar < 0) {
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pErrorCode = U_INVALID_CHAR_FOUND;
147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return NULL;
147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* function call for error cases */
147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc; /* continue after the lead byte */
148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++numSubstitutions;
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength+=U16_LENGTH(ch);
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pNumSubstitutions!=NULL) {
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pNumSubstitutions=numSubstitutions;
148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reqLength+=(int32_t)(pDest - dest);
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pDestLength) {
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pDestLength = reqLength;
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Terminate the buffer */
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
150150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI char* U_EXPORT2
150250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strToJavaModifiedUTF8(
150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        char *dest,
150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t destCapacity,
150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t *pDestLength,
150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *src,
150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t srcLength,
150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode *pErrorCode) {
150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reqLength=0;
151027f654740f2a26ad62a5c155af9199af9e69b889claireho    uint32_t ch=0;
151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *pDest = (uint8_t *)dest;
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *pDestLimit = pDest + destCapacity;
151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pSrcLimit;
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t count;
151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* args check */
151750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*pErrorCode)){
151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (dest==NULL && destCapacity!=0) || destCapacity<0
152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcLength==-1) {
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Convert NUL-terminated ASCII, then find the string length. */
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++ = (uint8_t)ch;
153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++src;
153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch == 0) {
153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength=(int32_t)(pDest - (uint8_t *)dest);
153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDestLength) {
153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDestLength = reqLength;
153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* Terminate the buffer */
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = u_strlen(src);
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    pSrcLimit = src+srcLength;
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count = (int32_t)(pDestLimit - pDest);
155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = (int32_t)(pSrcLimit - src);
155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* fast ASCII loop */
155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *prevSrc = src;
155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t delta;
155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)ch;
155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delta = (int32_t)(src - prevSrc);
156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count -= delta;
156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            srcLength -= delta;
156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Each iteration of the inner loop progresses by at most 3 UTF-8
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * bytes and one UChar.
156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count /= 3;
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count > srcLength) {
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count = srcLength; /* min(remaining dest/3, remaining src) */
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count < 3) {
157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Too much overhead if we get near the end of the string,
157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * continue with the next loop.
157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do {
157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ch=*src++;
158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch <= 0x7f && ch != 0) {
158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++ = (uint8_t)ch;
158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(ch <= 0x7ff) {
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>6)|0xc0);
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>12)|0xe0);
158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } while(--count > 0);
159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<pSrcLimit) {
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch=*src++;
159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f && ch != 0) {
159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDest<pDestLimit) {
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++ = (uint8_t)ch;
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 1;
160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(ch <= 0x7ff) {
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((pDestLimit - pDest) >= 2) {
160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>6)|0xc0);
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 2;
160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((pDestLimit - pDest) >= 3) {
161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>12)|0xe0);
161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 3;
161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<pSrcLimit) {
162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch=*src++;
162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f && ch != 0) {
162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++reqLength;
162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(ch<=0x7ff) {
162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength+=2;
162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength+=3;
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pDestLength){
163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pDestLength = reqLength;
163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Terminate the buffer */
163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1641