1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 459d709d503bab6e2b61931737e662dd293b40578ccornelius* Copyright (C) 2001-2013, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* File ustrtrns.cpp 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History: 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9/10/2001 Ram Creation. 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************************* 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * u_strTo* and u_strFrom* APIs 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * WCS functions moved to ustr_wcs.c for better modularization 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************************* 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 29103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf.h" 30103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf8.h" 31103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 35103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "uassert.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3759d709d503bab6e2b61931737e662dd293b40578ccornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 3859d709d503bab6e2b61931737e662dd293b40578ccornelius 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32WithSub(UChar *dest, 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *src, 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 47b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar32 *srcLimit; 48b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 ch; 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *destLimit; 50b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *pDest; 51b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t reqLength; 52b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t numSubstitutions; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*pErrorCode)){ 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = 0; 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pDest = dest; 71103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius destLimit = (dest!=NULL)?(dest + destCapacity):NULL; 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength = 0; 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numSubstitutions = 0; 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(srcLength < 0) { 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* simple loop for conversion of a NUL-terminated BMP string */ 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while((ch=*src) != 0 && 78b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) { 79b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = (UChar)ch; 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src; 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(ch != 0) { 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* "complicated" case, find the end of the remaining string */ 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(*++srcLimit != 0) {} 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 92103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius srcLimit = (src!=NULL)?(src + srcLength):NULL; 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* convert with length */ 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(src < srcLimit) { 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = *src++; 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru do { 99b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* usually "loops" once; twice only for writing subchar */ 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) { 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = (UChar)ch; 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if(0x10000 <= ch && ch <= 0x10ffff) { 108103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(pDest!=NULL && ((pDest + 2) <= destLimit)) { 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = U16_LEAD(ch); 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = U16_TRAIL(ch); 111b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 112b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength += 2; 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 115b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if((ch = subchar) < 0) { 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* surrogate code point, or not a Unicode code point at all */ 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 119b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++numSubstitutions; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 122b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while(TRUE); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += (int32_t)(pDest - dest); 126b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDestLength) { 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 130b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = numSubstitutions; 131b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar* U_EXPORT2 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32(UChar *dest, 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar32 *src, 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 146b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return u_strFromUTF32WithSub( 147b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest, destCapacity, pDestLength, 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru src, srcLength, 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_SENTINEL, NULL, 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pErrorCode); 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32* U_EXPORT2 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32WithSub(UChar32 *dest, 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *src, 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *srcLimit; 162b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 ch; 163b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar ch2; 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 *destLimit; 165b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 *pDest; 166b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t reqLength; 167b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t numSubstitutions; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*pErrorCode)){ 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = 0; 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pDest = dest; 186103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius destLimit = (dest!=NULL)?(dest + destCapacity):NULL; 187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength = 0; 188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numSubstitutions = 0; 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(srcLength < 0) { 191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* simple loop for conversion of a NUL-terminated BMP string */ 192b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) { 193b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = ch; 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src; 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(ch != 0) { 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* "complicated" case, find the end of the remaining string */ 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(*++srcLimit != 0) {} 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 206103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius srcLimit = (src!=NULL)?(src + srcLength):NULL; 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* convert with length */ 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(src < srcLimit) { 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = *src++; 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(!U16_IS_SURROGATE(ch)) { 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* write or count ch below */ 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) { 215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = U16_GET_SUPPLEMENTARY(ch, ch2); 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if((ch = subchar) < 0) { 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* unpaired surrogate */ 219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 221b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 222b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++numSubstitutions; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 224b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = ch; 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength += (int32_t)(pDest - dest); 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDestLength) { 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 235b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 236b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = numSubstitutions; 237b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar32* U_EXPORT2 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32(UChar32 *dest, 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *src, 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return u_strToUTF32WithSub( 253b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest, destCapacity, pDestLength, 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru src, srcLength, 255b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_SENTINEL, NULL, 256b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pErrorCode); 257b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 258b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* for utf8_nextCharSafeBodyTerminated() */ 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 }; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences: 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - checks for NUL termination instead of length 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1) 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte. 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte. 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) { 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s=*ps; 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trail, illegal=0; 277103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint8_t count=U8_COUNT_TRAIL_BYTES(c); 278103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U_ASSERT(count<6); 279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U8_MASK_LEAD_BYTE((c), count); 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each branch falls through to the next one */ 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f || c>=0x110) { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte, or code point>0x10ffff (outside Unicode) */ 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 2: /*fall through*/ 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f) { 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte */ 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 304103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 1: /*fall through*/ 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f) { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte */ 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no default branch to optimize switch() - all values are covered */ 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal is also set if count>=4 */ 319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) { 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error handling */ 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* don't go beyond this sequence */ 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=*ps; 323103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while(count>0 && U8_IS_TRAIL(*s)) { 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --count; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *ps=s; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences: 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1) 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte. 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte. 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) { 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s=*ps; 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trail, illegal=0; 346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint8_t count=U8_COUNT_TRAIL_BYTES(c); 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((limit-s)>=count) { 348103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius U8_MASK_LEAD_BYTE((c), count); 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each branch falls through to the next one */ 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0x110) { 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point>0x10ffff, outside Unicode */ 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 2: /*fall through*/ 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius case 1: /*fall through*/ 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no default branch to optimize switch() - all values are covered */ 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; /* too few bytes left */ 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal is also set if count>=4 */ 38659d709d503bab6e2b61931737e662dd293b40578ccornelius U_ASSERT(illegal || count<LENGTHOF(utf8_minLegal)); 387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(illegal || c<utf8_minLegal[count] || U_IS_SURROGATE(c)) { 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error handling */ 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* don't go beyond this sequence */ 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=*ps; 391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius while(count>0 && s<limit && U8_IS_TRAIL(*s)) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s; 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --count; 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *ps=s; 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8WithSub(UChar *dest, 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* src, 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDest = dest; 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDestLimit = dest+destCapacity; 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength = 0; 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t* pSrc = (const uint8_t*) src; 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t t1, t2; /* trail bytes */ 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSubstitutions; 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 430b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions!=NULL) { 431b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions=0; 432b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSubstitutions=0; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Inline processing of UTF-8 byte sequences: 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte sequences for the most common characters are handled inline in 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the conversion loops. In order to reduce the path lengths for those 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * characters, the tests are arranged in a kind of binary search. 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII (<=0x7f) is checked first, followed by the dividing point 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * between 2- and 3-byte sequences (0xe0). 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The 3-byte branch is tested first to speed up CJK text. 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The compiler should combine the subtractions for the two tests for 0xe0. 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each branch then tests for the other end of its range. 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0){ 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transform a NUL-terminated string. 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The code explicitly checks for NULs only in the lead byte position. 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A NUL byte in the trail byte position fails the trail byte range check anyway. 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch); 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0) { 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0xFFFF) { 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 490103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_LEAD(ch); 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 492103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_TRAIL(ch); 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pre-flight the rest of the string. */ 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch = *pSrc) != 0) { 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[2] - 0x80) <= 0x3f 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch); 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0) { 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += U16_LENGTH(ch); 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* srcLength >= 0 */ { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *pSrcLimit = pSrc + srcLength; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each iteration of the inner loop progresses by at most 3 UTF-8 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes and one UChar, for most characters. 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For supplementary code points (4 & 2), which are rare, 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there is an additional adjustment. 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = (int32_t)(pDestLimit - pDest); 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = (int32_t)((pSrcLimit - pSrc) / 3); 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count > srcLength) { 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = srcLength; /* min(remaining dest, remaining src/3) */ 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count < 3) { 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Too much overhead if we get near the end of the string, 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the next loop. 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch >= 0xf0 || subchar > 0xffff) { 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We may read up to six bytes and write up to two UChars, 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which we didn't account for with computing count, 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we adjust it here. 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(--count == 0) { 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else if(ch<=0xFFFF){ 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 611103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_LEAD(ch); 612103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_TRAIL(ch); 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count > 0); 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((pSrc<pSrcLimit) && (pDest<pDestLimit)) { 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 3) && 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 2) && 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else if(ch<=0xFFFF){ 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 657103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_LEAD(ch); 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit){ 659103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *(pDest++)=U16_TRAIL(ch); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 66750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* do not fill the dest buffer just count the UChars needed */ 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc < pSrcLimit){ 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 3) && 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[2] - 0x80) <= 0x3f 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 2) && 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 704103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius reqLength+=U16_LENGTH(ch); 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - dest); 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pNumSubstitutions!=NULL) { 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pNumSubstitutions=numSubstitutions; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8(UChar *dest, 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* src, 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strFromUTF8WithSub( 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, pDestLength, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SENTINEL, NULL, 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar * U_EXPORT2 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8Lenient(UChar *dest, 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDest = dest; 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength = 0; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t* pSrc = (uint8_t*) src; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 75750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) 75850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0) { 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Transform a NUL-terminated string. */ 765103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t t1, t2, t3; /* trail bytes */ 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0) { 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + t1 - 0x3080); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) { 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest < pDestLimit) { 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = 0xfffd; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*++pSrc != 0) {} 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pre-flight the rest of the string. */ 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch = *pSrc) != 0) { 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0) { 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0 && pSrc[2] != 0) { 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += 2; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* srcLength >= 0 */ { 851103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL; 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function requires that if srcLength is given, then it must be 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * destCapatity >= srcLength so that we need not check for 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * destination buffer overflow in the loop. 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destCapacity < srcLength) { 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength != NULL) { 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = srcLength; /* this likely overestimates the true destLength! */ 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 4) { 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc++; 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 12) + (*pSrc++ << 6); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (*pSrc++ << 12); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ << 6; 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ - 0x3c82080; 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(pSrc < pSrcLimit); 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrcLimit += 3; /* restore original pSrcLimit */ 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc < pSrcLimit) { 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc++; 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc < pSrcLimit) { 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 2) { 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 12) + (*pSrc++ << 6); 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 3) { 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (*pSrc++ << 12); 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ << 6; 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ - 0x3c82080; 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = 0xfffd; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - dest); 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 956103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusstatic inline uint8_t * 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_appendUTF8(uint8_t *pDest, UChar32 c) { 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((c)<=0x7f) { 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)c; 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0x7ff) { 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c>>6)|0xc0); 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c&0x3f)|0x80); 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0xffff) { 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c>>12)|0xe0); 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)&0x3f)|0x80); 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* if((uint32_t)(c)<=0x10ffff) */ { 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)>>18)|0xf0); 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)&0x3f)|0x80); 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pDest; 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8WithSub(char *dest, 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc, 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength=0; 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t ch=0,ch2=0; 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *pDest = (uint8_t *)dest; 989103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL; 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSubstitutions; 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (pSrc==NULL && srcLength!=0) || srcLength < -1 || 99850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1005b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions!=NULL) { 1006b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions=0; 1007b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSubstitutions=0; 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch=*pSrc)!=0) { 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 101550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 2) { 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 2; 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 3) { 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 3; 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1040103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/ 1041103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1043103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ch=U16_GET_SUPPLEMENTARY(ch, ch2); 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = U8_LENGTH(ch); 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= length) { 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = length; 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch=*pSrc++)!=0) { 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<=0x7f) { 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0x7ff) { 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=2; 1068103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(!U16_IS_SURROGATE(ch)) { 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=3; 1070103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=4; 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=U8_LENGTH(subchar); 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1083103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL; 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each iteration of the inner loop progresses by at most 3 UTF-8 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes and one UChar, for most characters. 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For supplementary code points (4 & 2), which are rare, 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there is an additional adjustment. 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = (int32_t)((pDestLimit - pDest) / 3); 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = (int32_t)(pSrcLimit - pSrc); 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count > srcLength) { 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = srcLength; /* min(remaining dest/3, remaining src) */ 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count < 3) { 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Too much overhead if we get near the end of the string, 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the next loop. 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 110950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We will read two UChars and probably output four bytes, 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which we didn't account for with computing count, 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we adjust it here. 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(--count == 0) { 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --pSrc; /* undo ch=*pSrc++ for the lead surrogate */ 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* recompute count */ 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1130103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ch=U16_GET_SUPPLEMENTARY(ch, ch2); 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* writing 4 bytes per 2 UChars is ok */ 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>18)|0xf0); 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80); 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(subchar>=0) { 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count > 0); 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc<pSrcLimit) { 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 115850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 2) { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 2; 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 3) { 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 3; 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1185103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius ch=U16_GET_SUPPLEMENTARY(ch, ch2); 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = U8_LENGTH(ch); 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= length) { 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = length; 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc<pSrcLimit) { 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<=0x7f) { 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0x7ff) { 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=2; 1211103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(!U16_IS_SURROGATE(ch)) { 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=3; 1213103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=4; 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=U8_LENGTH(subchar); 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - (uint8_t *)dest); 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pNumSubstitutions!=NULL) { 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pNumSubstitutions=numSubstitutions; 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 123850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 123950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8(char *dest, 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc, 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strToUTF8WithSub( 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, pDestLength, 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc, srcLength, 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SENTINEL, NULL, 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 125650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UChar* U_EXPORT2 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strFromJavaModifiedUTF8WithSub( 125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *dest, 125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *pDestLength, 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *src, 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t srcLength, 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 subchar, int32_t *pNumSubstitutions, 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *pDest = dest; 126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *pDestLimit = dest+destCapacity; 126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 ch; 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t reqLength = 0; 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t* pSrc = (const uint8_t*) src; 127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *pSrcLimit; 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t count; 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t t1, t2; /* trail bytes */ 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numSubstitutions; 127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* args check */ 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode)){ 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (dest==NULL && destCapacity!=0) || destCapacity<0 || 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho subchar > 0x10ffff || U_IS_SURROGATE(subchar) 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pNumSubstitutions!=NULL) { 128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pNumSubstitutions=0; 128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numSubstitutions=0; 129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcLength < 0) { 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Transform a NUL-terminated ASCII string. 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Handle non-ASCII strings with slower code. 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) { 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch == 0) { 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength=(int32_t)(pDest - dest); 130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = uprv_strlen((const char *)pSrc); 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 131554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius pSrcLimit = (pSrc == NULL) ? NULL : pSrc + srcLength; 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = (int32_t)(pDestLimit - pDest); 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = (int32_t)(pSrcLimit - pSrc); 131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) { 132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* fast ASCII loop */ 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *prevSrc = pSrc; 132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta; 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) { 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delta = (int32_t)(pSrc - prevSrc); 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count -= delta; 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength -= delta; 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Each iteration of the inner loop progresses by at most 3 UTF-8 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * bytes and one UChar. 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength /= 3; 133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count > srcLength) { 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = srcLength; /* min(remaining dest, remaining src/3) */ 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count < 3) { 134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Too much overhead if we get near the end of the string, 134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * continue with the next loop. 134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f){ 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(subchar > 0xffff && --count == 0) { 137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * We need to write two UChars, adjusted count for that, 138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and ran out of space. 138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 138750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar<=0xFFFF) { 138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=(UChar)subchar; 139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_LEAD(subchar); 139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_TRAIL(subchar); 139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(--count > 0); 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((pSrc<pSrcLimit) && (pDest<pDestLimit)) { 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f){ 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 3) && 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 2) && 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar<=0xFFFF) { 143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=(UChar)subchar; 143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_LEAD(subchar); 144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDest<pDestLimit) { 144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_TRAIL(subchar); 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* do not fill the dest buffer just count the UChars needed */ 145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(pSrc < pSrcLimit){ 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f) { 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 3) && 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[2] - 0x80) <= 0x3f 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 2) && 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[1] - 0x80) <= 0x3f 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=U16_LENGTH(ch); 149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pNumSubstitutions!=NULL) { 149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pNumSubstitutions=numSubstitutions; 149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=(int32_t)(pDest - dest); 150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 150150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 150250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 150950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI char* U_EXPORT2 151050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strToJavaModifiedUTF8( 151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *dest, 151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *pDestLength, 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *src, 151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t srcLength, 151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 151750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t reqLength=0; 151827f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t ch=0; 151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t *pDest = (uint8_t *)dest; 152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t *pDestLimit = pDest + destCapacity; 152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pSrcLimit; 152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t count; 152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* args check */ 152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode)){ 152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (dest==NULL && destCapacity!=0) || destCapacity<0 153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcLength==-1) { 153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Convert NUL-terminated ASCII, then find the string length. */ 153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) { 153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch == 0) { 154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength=(int32_t)(pDest - (uint8_t *)dest); 154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = u_strlen(src); 155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 1555103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius pSrcLimit = (src!=NULL)?(src+srcLength):NULL; 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = (int32_t)(pDestLimit - pDest); 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = (int32_t)(pSrcLimit - src); 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count >= srcLength && srcLength > 0 && *src <= 0x7f) { 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* fast ASCII loop */ 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc = src; 156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta; 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) { 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)ch; 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delta = (int32_t)(src - prevSrc); 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count -= delta; 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength -= delta; 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Each iteration of the inner loop progresses by at most 3 UTF-8 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * bytes and one UChar. 157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count /= 3; 157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count > srcLength) { 157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = srcLength; /* min(remaining dest/3, remaining src) */ 157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count < 3) { 158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Too much overhead if we get near the end of the string, 158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * continue with the next loop. 158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch <= 0x7ff) { 159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>6)|0xc0); 159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>12)|0xe0); 159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(--count > 0); 159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src<pSrcLimit) { 160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDest<pDestLimit) { 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 1; 160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch <= 0x7ff) { 161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((pDestLimit - pDest) >= 2) { 161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>6)|0xc0); 161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 2; 161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((pDestLimit - pDest) >= 3) { 162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>12)|0xe0); 162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 3; 162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src<pSrcLimit) { 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++reqLength; 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch<=0x7ff) { 163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=2; 163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=3; 163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=(int32_t)(pDest - (uint8_t *)dest); 164150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength){ 164250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 164350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 164450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 164550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 164650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 164750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 164850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1649