1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
459d709d503bab6e2b61931737e662dd293b40578ccornelius*   Copyright (C) 2001-2013, International Business Machines
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
9103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* File ustrtrns.cpp
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History:
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   9/10/2001    Ram    Creation.
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*******************************************************************************
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * u_strTo* and u_strFrom* APIs
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * WCS functions moved to ustr_wcs.c for better modularization
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *******************************************************************************
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h"
30103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h"
31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h"
35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
3759d709d503bab6e2b61931737e662dd293b40578ccornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
3859d709d503bab6e2b61931737e662dd293b40578ccornelius
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32WithSub(UChar *dest,
41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t destCapacity,
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               int32_t *pDestLength,
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               const UChar32 *src,
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru               int32_t srcLength,
45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UChar32 subchar, int32_t *pNumSubstitutions,
46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UErrorCode *pErrorCode) {
47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UChar32 *srcLimit;
48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 ch;
49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar *destLimit;
50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar *pDest;
51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t reqLength;
52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t numSubstitutions;
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(U_FAILURE(*pErrorCode)){
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ) {
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = 0;
68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    pDest = dest;
71103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength = 0;
73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    numSubstitutions = 0;
74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(srcLength < 0) {
76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* simple loop for conversion of a NUL-terminated BMP string */
77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        while((ch=*src) != 0 &&
78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru              ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) {
79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if(pDest < destLimit) {
81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                *pDest++ = (UChar)ch;
82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++reqLength;
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src;
87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(ch != 0) {
88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* "complicated" case, find the end of the remaining string */
89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            while(*++srcLimit != 0) {}
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    } else {
92103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius      srcLimit = (src!=NULL)?(src + srcLength):NULL;
93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /* convert with length */
96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    while(src < srcLimit) {
97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        ch = *src++;
98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        do {
99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* usually "loops" once; twice only for writing subchar */
100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) {
101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                if(pDest < destLimit) {
102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = (UChar)ch;
103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    ++reqLength;
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                break;
107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else if(0x10000 <= ch && ch <= 0x10ffff) {
108103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if(pDest!=NULL && ((pDest + 2) <= destLimit)) {
109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = U16_LEAD(ch);
110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    *pDest++ = U16_TRAIL(ch);
111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                } else {
112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                    reqLength += 2;
113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                }
114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                break;
115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else if((ch = subchar) < 0) {
116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                /* surrogate code point, or not a Unicode code point at all */
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++numSubstitutions;
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } while(TRUE);
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength += (int32_t)(pDest - dest);
126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pDestLength) {
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = numSubstitutions;
131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar* U_EXPORT2
140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32(UChar *dest,
141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t destCapacity,
142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t *pDestLength,
143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               const UChar32 *src,
144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               int32_t srcLength,
145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru               UErrorCode *pErrorCode) {
146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    return u_strFromUTF32WithSub(
147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            dest, destCapacity, pDestLength,
148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            src, srcLength,
149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            U_SENTINEL, NULL,
150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            pErrorCode);
151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32* U_EXPORT2
154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32WithSub(UChar32 *dest,
155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t destCapacity,
156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t *pDestLength,
157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             const UChar *src,
158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t srcLength,
159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UChar32 subchar, int32_t *pNumSubstitutions,
160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UErrorCode *pErrorCode) {
161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    const UChar *srcLimit;
162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 ch;
163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar ch2;
164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 *destLimit;
165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    UChar32 *pDest;
166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t reqLength;
167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    int32_t numSubstitutions;
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(U_FAILURE(*pErrorCode)){
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ) {
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = 0;
183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    pDest = dest;
186103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    destLimit = (dest!=NULL)?(dest + destCapacity):NULL;
187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength = 0;
188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    numSubstitutions = 0;
189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(srcLength < 0) {
191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        /* simple loop for conversion of a NUL-terminated BMP string */
192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) {
193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            if(pDest < destLimit) {
195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                *pDest++ = ch;
196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            } else {
197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru                ++reqLength;
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        srcLimit = src;
201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(ch != 0) {
202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* "complicated" case, find the end of the remaining string */
203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            while(*++srcLimit != 0) {}
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        srcLimit = (src!=NULL)?(src + srcLength):NULL;
207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    /* convert with length */
210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    while(src < srcLimit) {
211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        ch = *src++;
212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(!U16_IS_SURROGATE(ch)) {
213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* write or count ch below */
214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) {
215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++src;
216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ch = U16_GET_SUPPLEMENTARY(ch, ch2);
217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else if((ch = subchar) < 0) {
218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            /* unpaired surrogate */
219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            *pErrorCode = U_INVALID_CHAR_FOUND;
220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            return NULL;
221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else {
222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            ++numSubstitutions;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        if(pDest < destLimit) {
225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            *pDest++ = ch;
226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        } else {
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++reqLength;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    reqLength += (int32_t)(pDest - dest);
232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pDestLength) {
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions != NULL) {
236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions = numSubstitutions;
237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode);
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar32* U_EXPORT2
246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32(UChar32 *dest,
247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t destCapacity,
248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t *pDestLength,
249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             const UChar *src,
250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             int32_t srcLength,
251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru             UErrorCode *pErrorCode) {
252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    return u_strToUTF32WithSub(
253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            dest, destCapacity, pDestLength,
254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            src, srcLength,
255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            U_SENTINEL, NULL,
256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru            pErrorCode);
257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru}
258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* for utf8_nextCharSafeBodyTerminated() */
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 };
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences:
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - checks for NUL termination instead of length
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1)
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte.
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte.
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) {
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *s=*ps;
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t trail, illegal=0;
277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
278103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    U_ASSERT(count<6);
279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    U8_MASK_LEAD_BYTE((c), count);
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    switch(count) {
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* each branch falls through to the next one */
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 5:
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 4:
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        illegal=1;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 3:
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f || c>=0x110) {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte, or code point>0x10ffff (outside Unicode) */
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    case 2: /*fall through*/
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f) {
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte */
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    case 1: /*fall through*/
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        trail=(uint8_t)(*s++ - 0x80);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(trail>0x3f) {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* not a trail byte */
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=(c<<6)|trail;
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        break;
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    case 0:
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return U_SENTINEL;
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* no default branch to optimize switch()  - all values are covered */
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* illegal is also set if count>=4 */
319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error handling */
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* don't go beyond this sequence */
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=*ps;
323103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        while(count>0 && U8_IS_TRAIL(*s)) {
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++s;
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --count;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=U_SENTINEL;
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *ps=s;
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences:
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1)
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte.
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte.
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) {
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t *s=*ps;
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t trail, illegal=0;
346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint8_t count=U8_COUNT_TRAIL_BYTES(c);
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((limit-s)>=count) {
348103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        U8_MASK_LEAD_BYTE((c), count);
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch(count) {
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* each branch falls through to the next one */
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 5:
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 4:
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal=1;
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 3:
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(c<0x110) {
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                illegal|=(trail&0xc0)^0x80;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* code point>0x10ffff, outside Unicode */
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                illegal=1;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case 2: /*fall through*/
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal|=(trail&0xc0)^0x80;
371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        case 1: /*fall through*/
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            trail=*s++;
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            c=(c<<6)|(trail&0x3f);
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            illegal|=(trail&0xc0)^0x80;
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0:
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return U_SENTINEL;
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* no default branch to optimize switch()  - all values are covered */
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        illegal=1; /* too few bytes left */
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* correct sequence - all trail bytes have (b7..b6)==(10)? */
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* illegal is also set if count>=4 */
38659d709d503bab6e2b61931737e662dd293b40578ccornelius    U_ASSERT(illegal || count<LENGTHOF(utf8_minLegal));
387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) {
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* error handling */
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* don't go beyond this sequence */
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        s=*ps;
391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        while(count>0 && s<limit && U8_IS_TRAIL(*s)) {
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++s;
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            --count;
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c=U_SENTINEL;
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *ps=s;
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return c;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8WithSub(UChar *dest,
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t destCapacity,
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t *pDestLength,
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const char* src,
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t srcLength,
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UChar32 subchar, int32_t *pNumSubstitutions,
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode){
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDest = dest;
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDestLimit = dest+destCapacity;
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 ch;
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength = 0;
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t* pSrc = (const uint8_t*) src;
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t t1, t2; /* trail bytes */
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t numSubstitutions;
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions=0;
432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    numSubstitutions=0;
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /*
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Inline processing of UTF-8 byte sequences:
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     *
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Byte sequences for the most common characters are handled inline in
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * the conversion loops. In order to reduce the path lengths for those
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * characters, the tests are arranged in a kind of binary search.
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * ASCII (<=0x7f) is checked first, followed by the dividing point
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * between 2- and 3-byte sequences (0xe0).
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The 3-byte branch is tested first to speed up CJK text.
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * The compiler should combine the subtractions for the two tests for 0xe0.
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     * Each branch then tests for the other end of its range.
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru     */
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength < 0){
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * Transform a NUL-terminated string.
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * The code explicitly checks for NULs only in the lead byte position.
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * A NUL byte in the trail byte position fails the trail byte range check anyway.
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch<=0xFFFF) {
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=(UChar)ch;
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
490103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    *(pDest++)=U16_LEAD(ch);
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest<pDestLimit) {
492103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        *(pDest++)=U16_TRAIL(ch);
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Pre-flight the rest of the string. */
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch = *pSrc) != 0) {
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++reqLength;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++reqLength;
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch);
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0) {
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength += U16_LENGTH(ch);
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* srcLength >= 0 */ {
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const uint8_t *pSrcLimit = pSrc + srcLength;
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t count;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Each iteration of the inner loop progresses by at most 3 UTF-8
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * bytes and one UChar, for most characters.
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * For supplementary code points (4 & 2), which are rare,
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * there is an additional adjustment.
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count = (int32_t)(pDestLimit - pDest);
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            srcLength = (int32_t)((pSrcLimit - pSrc) / 3);
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count > srcLength) {
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                count = srcLength; /* min(remaining dest, remaining src/3) */
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count < 3) {
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * Too much overhead if we get near the end of the string,
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * continue with the next loop.
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *pSrc;
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch <= 0x7f){
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(UChar)ch;
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch > 0xe0) {
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if( /* handle U+1000..U+CFFF inline */
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch <= 0xec &&
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ) {
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            pSrc += 3;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            continue;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else if(ch < 0xe0) {
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if( /* handle U+0080..U+07FF inline */
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch >= 0xc2 &&
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ) {
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            pSrc += 2;
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            continue;
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch >= 0xf0 || subchar > 0xffff) {
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /*
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * We may read up to six bytes and write up to two UChars,
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * which we didn't account for with computing count,
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         * so we adjust it here.
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         */
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(--count == 0) {
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            break;
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* function call for "complicated" and error cases */
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc; /* continue after the lead byte */
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pErrorCode = U_INVALID_CHAR_FOUND;
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        return NULL;
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else if(ch<=0xFFFF){
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++)=(UChar)ch;
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else{
611103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        *(pDest++)=U16_LEAD(ch);
612103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        *(pDest++)=U16_TRAIL(ch);
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(--count > 0);
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc;
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 3) &&
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 2) &&
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }else if(ch<=0xFFFF){
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++)=(UChar)ch;
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }else{
657103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    *(pDest++)=U16_LEAD(ch);
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest<pDestLimit){
659103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        *(pDest++)=U16_TRAIL(ch);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }else{
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* do not fill the dest buffer just count the UChars needed */
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc < pSrcLimit){
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc;
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f){
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength++;
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch > 0xe0) {
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+1000..U+CFFF inline */
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch <= 0xec &&
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 3) &&
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[2] - 0x80) <= 0x3f
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 3;
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) {
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if( /* handle U+0080..U+07FF inline */
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ch >= 0xc2 &&
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ((pSrcLimit - pSrc) >= 2) &&
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (uint8_t)(pSrc[1] - 0x80) <= 0x3f
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ) {
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength++;
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pSrc += 2;
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        continue;
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* function call for "complicated" and error cases */
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc; /* continue after the lead byte */
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch<0 && (++numSubstitutions, ch = subchar) < 0){
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
704103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                reqLength+=U16_LENGTH(ch);
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - dest);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pNumSubstitutions=numSubstitutions;
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8(UChar *dest,
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t destCapacity,
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t *pDestLength,
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              const char* src,
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              int32_t srcLength,
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode *pErrorCode){
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_strFromUTF8WithSub(
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest, destCapacity, pDestLength,
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            src, srcLength,
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SENTINEL, NULL,
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pErrorCode);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar * U_EXPORT2
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8Lenient(UChar *dest,
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t destCapacity,
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t *pDestLength,
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     const char *src,
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     int32_t srcLength,
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     UErrorCode *pErrorCode) {
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *pDest = dest;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar32 ch;
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength = 0;
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t* pSrc = (uint8_t*) src;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0)
75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength < 0) {
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Transform a NUL-terminated string. */
765103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint8_t t1, t2, t3; /* trail bytes */
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) {
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0) {
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + t1 - 0x3080);
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 2;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) {
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080);
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) {
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080;
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(pDest < pDestLimit) {
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *(pDest++) = U16_TRAIL(ch);
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        reqLength = 1;
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pDest++ = 0xfffd;
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            while(*++pSrc != 0) {}
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Pre-flight the rest of the string. */
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch = *pSrc) != 0) {
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0) {
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++reqLength;
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 2;
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0 && pSrc[2] != 0) {
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++reqLength;
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) {
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength += 2;
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++reqLength;
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* srcLength >= 0 */ {
851103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius      const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL;
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * This function requires that if srcLength is given, then it must be
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * destCapatity >= srcLength so that we need not check for
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         * destination buffer overflow in the loop.
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         */
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(destCapacity < srcLength) {
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(pDestLength != NULL) {
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDestLength = srcLength; /* this likely overestimates the true destLength! */
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if((pSrcLimit - pSrc) >= 4) {
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrcLimit -= 3; /* temporarily reduce pSrcLimit */
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch = *pSrc++;
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch < 0xc0) {
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * ASCII, or a trail byte in lead position which is treated like
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * a single-byte sequence for better character boundary
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * resynchronization after illegal sequences.
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     */
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(UChar)ch;
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xe0) { /* U+0080..U+07FF */
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch < 0xf0) { /* U+0800..U+FFFF */
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 12) + (*pSrc++ << 6);
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else /* f0..f4 */ { /* U+10000..U+10FFFF */
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (*pSrc++ << 12);
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ << 6;
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ - 0x3c82080;
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_TRAIL(ch);
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(pSrc < pSrcLimit);
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrcLimit += 3; /* restore original pSrcLimit */
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc < pSrcLimit) {
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch = *pSrc++;
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch < 0xc0) {
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * ASCII, or a trail byte in lead position which is treated like
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * a single-byte sequence for better character boundary
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * resynchronization after illegal sequences.
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pDest++=(UChar)ch;
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xe0) { /* U+0080..U+07FF */
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pSrc < pSrcLimit) {
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3080 = (0xc0 << 6) + 0x80 */
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080);
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch < 0xf0) { /* U+0800..U+FFFF */
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pSrcLimit - pSrc) >= 2) {
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x2080 = (0x80 << 6) + 0x80 */
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 12) + (*pSrc++ << 6);
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++ = (UChar)(ch + *pSrc++ - 0x2080);
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 3;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* f0..f4 */ { /* U+10000..U+10FFFF */
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pSrcLimit - pSrc) >= 3) {
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch = (ch << 18) + (*pSrc++ << 12);
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ << 6;
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch += *pSrc++ - 0x3c82080;
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_LEAD(ch);
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *(pDest++) = U16_TRAIL(ch);
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pSrc += 4;
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    continue;
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* truncated character at the end */
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *pDest++ = 0xfffd;
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - dest);
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return dest;
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
956103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint8_t *
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_appendUTF8(uint8_t *pDest, UChar32 c) {
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if((c)<=0x7f) {
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)c;
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(c<=0x7ff) {
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c>>6)|0xc0);
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c&0x3f)|0x80);
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if(c<=0xffff) {
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((c>>12)|0xe0);
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80);
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else /* if((uint32_t)(c)<=0x10ffff) */ {
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)>>18)|0xf0);
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80);
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80);
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDest++=(uint8_t)(((c)&0x3f)|0x80);
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return pDest;
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8WithSub(char *dest,
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t destCapacity,
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t *pDestLength,
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UChar *pSrc,
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t srcLength,
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar32 subchar, int32_t *pNumSubstitutions,
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode *pErrorCode){
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t reqLength=0;
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t ch=0,ch2=0;
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint8_t *pDest = (uint8_t *)dest;
989103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL;
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t numSubstitutions;
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* args check */
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (pSrc==NULL && srcLength!=0) || srcLength < -1 ||
99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (destCapacity<0) || (dest == NULL && destCapacity > 0) ||
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ) {
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1005b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
1006b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru        *pNumSubstitutions=0;
1007b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    }
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    numSubstitutions=0;
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(srcLength==-1) {
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch=*pSrc)!=0) {
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++pSrc;
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f) {
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pDest<pDestLimit) {
101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 1;
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0x7ff) {
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 2) {
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 2;
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0xd7ff || ch >= 0xe000) {
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 3) {
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 3;
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* ch is a surrogate */ {
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t length;
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1040103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/
1041103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
1043103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(subchar>=0) {
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=subchar;
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++numSubstitutions;
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length = U8_LENGTH(ch);
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= length) {
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* convert and append*/
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pDest=_appendUTF8(pDest, ch);
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = length;
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while((ch=*pSrc++)!=0) {
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch<=0x7f) {
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch<=0x7ff) {
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=2;
1068103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else if(!U16_IS_SURROGATE(ch)) {
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=3;
1070103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=4;
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(subchar>=0) {
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=U8_LENGTH(subchar);
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++numSubstitutions;
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1083103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL;
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t count;
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for(;;) {
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /*
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * Each iteration of the inner loop progresses by at most 3 UTF-8
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * bytes and one UChar, for most characters.
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * For supplementary code points (4 & 2), which are rare,
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             * there is an additional adjustment.
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru             */
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            count = (int32_t)((pDestLimit - pDest) / 3);
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            srcLength = (int32_t)(pSrcLimit - pSrc);
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count > srcLength) {
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                count = srcLength; /* min(remaining dest/3, remaining src) */
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(count < 3) {
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /*
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * Too much overhead if we get near the end of the string,
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 * continue with the next loop.
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                 */
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ch=*pSrc++;
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(ch <= 0x7f) {
110950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch <= 0x7ff) {
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(ch <= 0xd7ff || ch >= 0xe000) {
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else /* ch is a surrogate */ {
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /*
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * We will read two UChars and probably output four bytes,
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * which we didn't account for with computing count,
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     * so we adjust it here.
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                     */
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if(--count == 0) {
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        --pSrc; /* undo ch=*pSrc++ for the lead surrogate */
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;  /* recompute count */
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) {
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ++pSrc;
1130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                        ch=U16_GET_SUPPLEMENTARY(ch, ch2);
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* writing 4 bytes per 2 UChars is ok */
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)((ch>>18)|0xf0);
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80);
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        *pDest++=(uint8_t)((ch&0x3f)|0x80);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else  {
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if(subchar>=0) {
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ch=subchar;
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ++numSubstitutions;
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } else {
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            *pErrorCode = U_INVALID_CHAR_FOUND;
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            return NULL;
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        /* convert and append*/
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pDest=_appendUTF8(pDest, ch);
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while(--count > 0);
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc<pSrcLimit) {
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch=*pSrc++;
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch <= 0x7f) {
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if(pDest<pDestLimit) {
115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (uint8_t)ch;
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 1;
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0x7ff) {
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 2) {
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>6)|0xc0);
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 2;
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch <= 0xd7ff || ch >= 0xe000) {
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= 3) {
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch>>12)|0xe0);
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pDest++=(uint8_t)((ch&0x3f)|0x80);
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = 3;
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else /* ch is a surrogate */ {
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t length;
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++pSrc;
1185103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                    ch=U16_GET_SUPPLEMENTARY(ch, ch2);
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if(subchar>=0) {
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ch=subchar;
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++numSubstitutions;
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    *pErrorCode = U_INVALID_CHAR_FOUND;
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length = U8_LENGTH(ch);
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if((pDestLimit - pDest) >= length) {
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* convert and append*/
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pDest=_appendUTF8(pDest, ch);
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    reqLength = length;
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    break;
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while(pSrc<pSrcLimit) {
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ch=*pSrc++;
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if(ch<=0x7f) {
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++reqLength;
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(ch<=0x7ff) {
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=2;
1211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else if(!U16_IS_SURROGATE(ch)) {
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=3;
1213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) {
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pSrc;
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=4;
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if(subchar>=0) {
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                reqLength+=U8_LENGTH(subchar);
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++numSubstitutions;
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Unicode 3.2 forbids surrogate code points in UTF-8 */
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                *pErrorCode = U_INVALID_CHAR_FOUND;
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pNumSubstitutions!=NULL) {
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pNumSubstitutions=numSubstitutions;
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(pDestLength){
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        *pDestLength = reqLength;
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Terminate the buffer */
123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8(char *dest,
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t destCapacity,
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t *pDestLength,
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const UChar *pSrc,
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int32_t srcLength,
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode *pErrorCode){
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return u_strToUTF8WithSub(
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest, destCapacity, pDestLength,
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pSrc, srcLength,
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SENTINEL, NULL,
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pErrorCode);
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
125650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UChar* U_EXPORT2
125750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strFromJavaModifiedUTF8WithSub(
125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar *dest,
125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t destCapacity,
126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t *pDestLength,
126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const char *src,
126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t srcLength,
126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 subchar, int32_t *pNumSubstitutions,
126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode *pErrorCode) {
126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *pDest = dest;
126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar *pDestLimit = dest+destCapacity;
126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UChar32 ch;
126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reqLength = 0;
126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t* pSrc = (const uint8_t*) src;
127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const uint8_t *pSrcLimit;
127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t count;
127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t t1, t2; /* trail bytes */
127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t numSubstitutions;
127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* args check */
127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*pErrorCode)){
127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (dest==NULL && destCapacity!=0) || destCapacity<0 ||
128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        subchar > 0x10ffff || U_IS_SURROGATE(subchar)
128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pNumSubstitutions!=NULL) {
128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pNumSubstitutions=0;
128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    numSubstitutions=0;
129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcLength < 0) {
129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Transform a NUL-terminated ASCII string.
129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Handle non-ASCII strings with slower code.
129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) {
129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++=(UChar)ch;
129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch == 0) {
130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength=(int32_t)(pDest - dest);
130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDestLength) {
130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDestLength = reqLength;
130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* Terminate the buffer */
130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = uprv_strlen((const char *)pSrc);
131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
131554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    pSrcLimit = (pSrc == NULL) ? NULL : pSrc + srcLength;
131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count = (int32_t)(pDestLimit - pDest);
131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = (int32_t)(pSrcLimit - pSrc);
131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) {
132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* fast ASCII loop */
132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const uint8_t *prevSrc = pSrc;
132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t delta;
132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) {
132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(UChar)ch;
132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc;
132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delta = (int32_t)(pSrc - prevSrc);
132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count -= delta;
132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            srcLength -= delta;
133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Each iteration of the inner loop progresses by at most 3 UTF-8
133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * bytes and one UChar.
133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength /= 3;
133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count > srcLength) {
133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count = srcLength; /* min(remaining dest, remaining src/3) */
133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count < 3) {
134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Too much overhead if we get near the end of the string,
134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * continue with the next loop.
134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do {
134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ch = *pSrc;
134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch <= 0x7f){
134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(UChar)ch;
135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc;
135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(ch >= 0xe0) {
135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if( /* handle U+0000..U+FFFF inline */
135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ch <= 0xef &&
135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ) {
135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pSrc += 3;
136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if( /* handle U+0000..U+07FF inline */
136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        ch >= 0xc0 &&
136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ) {
136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        pSrc += 2;
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        continue;
137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(subchar < 0) {
137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pErrorCode = U_INVALID_CHAR_FOUND;
137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    return NULL;
137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else if(subchar > 0xffff && --count == 0) {
137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /*
137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     * We need to write two UChars, adjusted count for that,
138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     * and ran out of space.
138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                     */
138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    break;
138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /* function call for error cases */
138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++pSrc; /* continue after the lead byte */
138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
138750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ++numSubstitutions;
138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(subchar<=0xFFFF) {
138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=(UChar)subchar;
139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_LEAD(subchar);
139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_TRAIL(subchar);
139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } while(--count > 0);
139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while((pSrc<pSrcLimit) && (pDest<pDestLimit)) {
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch = *pSrc;
140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f){
140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++=(UChar)ch;
140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch >= 0xe0) {
140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+FFFF inline */
140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch <= 0xef &&
140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 3) &&
140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f &&
141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f
141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */
141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2);
141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 3;
141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+07FF inline */
141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch >= 0xc0 &&
142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 2) &&
142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f
142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1);
142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 2;
142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(subchar < 0) {
143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pErrorCode = U_INVALID_CHAR_FOUND;
143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return NULL;
143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* function call for error cases */
143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc; /* continue after the lead byte */
143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++numSubstitutions;
143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if(subchar<=0xFFFF) {
143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *(pDest++)=(UChar)subchar;
143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                } else {
144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    *(pDest++)=U16_LEAD(subchar);
144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    if(pDest<pDestLimit) {
144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        *(pDest++)=U16_TRAIL(subchar);
144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    } else {
144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        reqLength++;
144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                        break;
144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    }
144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* do not fill the dest buffer just count the UChars needed */
145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(pSrc < pSrcLimit){
145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch = *pSrc;
145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f) {
145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength++;
145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++pSrc;
145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch >= 0xe0) {
146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+FFFF inline */
146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch <= 0xef &&
146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 3) &&
146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f &&
146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[2] - 0x80) <= 0x3f
146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    reqLength++;
146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 3;
146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                if( /* handle U+0000..U+07FF inline */
147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ch >= 0xc0 &&
147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    ((pSrcLimit - pSrc) >= 2) &&
147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    (uint8_t)(pSrc[1] - 0x80) <= 0x3f
147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ) {
147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    reqLength++;
147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    pSrc += 2;
147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    continue;
147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                }
148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(subchar < 0) {
148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pErrorCode = U_INVALID_CHAR_FOUND;
148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                return NULL;
148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                /* function call for error cases */
148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++pSrc; /* continue after the lead byte */
148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch);
148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++numSubstitutions;
149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength+=U16_LENGTH(ch);
149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pNumSubstitutions!=NULL) {
149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pNumSubstitutions=numSubstitutions;
149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reqLength+=(int32_t)(pDest - dest);
150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pDestLength) {
150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pDestLength = reqLength;
150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Terminate the buffer */
150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateUChars(dest, destCapacity, reqLength, pErrorCode);
150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
150950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI char* U_EXPORT2
151050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strToJavaModifiedUTF8(
151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        char *dest,
151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t destCapacity,
151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t *pDestLength,
151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const UChar *src,
151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        int32_t srcLength,
151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UErrorCode *pErrorCode) {
151750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t reqLength=0;
151827f654740f2a26ad62a5c155af9199af9e69b889claireho    uint32_t ch=0;
151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *pDest = (uint8_t *)dest;
152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    uint8_t *pDestLimit = pDest + destCapacity;
152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UChar *pSrcLimit;
152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    int32_t count;
152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* args check */
152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(*pErrorCode)){
152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        (dest==NULL && destCapacity!=0) || destCapacity<0
153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    ) {
153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return NULL;
153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(srcLength==-1) {
153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /* Convert NUL-terminated ASCII, then find the string length. */
153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) {
153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            *pDest++ = (uint8_t)ch;
153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++src;
154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch == 0) {
154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength=(int32_t)(pDest - (uint8_t *)dest);
154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDestLength) {
154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDestLength = reqLength;
154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* Terminate the buffer */
154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return dest;
155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = u_strlen(src);
155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */
1555103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    pSrcLimit = (src!=NULL)?(src+srcLength):NULL;
155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    for(;;) {
155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count = (int32_t)(pDestLimit - pDest);
155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        srcLength = (int32_t)(pSrcLimit - src);
155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count >= srcLength && srcLength > 0 && *src <= 0x7f) {
156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /* fast ASCII loop */
156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            const UChar *prevSrc = src;
156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            int32_t delta;
156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) {
156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)ch;
156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                ++src;
156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            delta = (int32_t)(src - prevSrc);
156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count -= delta;
156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            srcLength -= delta;
157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        /*
157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * Each iteration of the inner loop progresses by at most 3 UTF-8
157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         * bytes and one UChar.
157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho         */
157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        count /= 3;
157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count > srcLength) {
157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            count = srcLength; /* min(remaining dest/3, remaining src) */
157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(count < 3) {
158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            /*
158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * Too much overhead if we get near the end of the string,
158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             * continue with the next loop.
158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho             */
158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        do {
158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ch=*src++;
158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(ch <= 0x7f && ch != 0) {
158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++ = (uint8_t)ch;
159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else if(ch <= 0x7ff) {
159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>6)|0xc0);
159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>12)|0xe0);
159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } while(--count > 0);
159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<pSrcLimit) {
160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch=*src++;
160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f && ch != 0) {
160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(pDest<pDestLimit) {
160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++ = (uint8_t)ch;
160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 1;
160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(ch <= 0x7ff) {
161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((pDestLimit - pDest) >= 2) {
161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>6)|0xc0);
161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 2;
161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if((pDestLimit - pDest) >= 3) {
162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch>>12)|0xe0);
162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80);
162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                *pDest++=(uint8_t)((ch&0x3f)|0x80);
162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                reqLength = 3;
162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                break;
162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(src<pSrcLimit) {
163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        ch=*src++;
163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(ch <= 0x7f && ch != 0) {
163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            ++reqLength;
163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else if(ch<=0x7ff) {
163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength+=2;
163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            reqLength+=3;
163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    reqLength+=(int32_t)(pDest - (uint8_t *)dest);
164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(pDestLength){
164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *pDestLength = reqLength;
164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Terminate the buffer */
164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_terminateChars(dest, destCapacity, reqLength, pErrorCode);
164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return dest;
164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
1649