1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2001-2010, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File ustrtrns.c 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History: 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 9/10/2001 Ram Creation. 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************************* 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * u_strTo* and u_strFrom* APIs 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * WCS functions moved to ustr_wcs.c for better modularization 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ******************************************************************************* 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 34b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32WithSub(UChar *dest, 35b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *src, 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 39b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 40b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 41b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar32 *srcLimit; 42b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 ch; 43b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *destLimit; 44b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar *pDest; 45b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t reqLength; 46b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t numSubstitutions; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 49b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*pErrorCode)){ 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 54b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 55b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 60b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 61b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = 0; 62b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 63b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 64b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pDest = dest; 65b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru destLimit = dest + destCapacity; 66b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength = 0; 67b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numSubstitutions = 0; 68b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 69b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(srcLength < 0) { 70b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* simple loop for conversion of a NUL-terminated BMP string */ 71b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while((ch=*src) != 0 && 72b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) { 73b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 74b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 75b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = (UChar)ch; 76b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 77b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 80b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src; 81b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(ch != 0) { 82b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* "complicated" case, find the end of the remaining string */ 83b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(*++srcLimit != 0) {} 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 85b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 86b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src + srcLength; 87b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 88b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 89b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* convert with length */ 90b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(src < srcLimit) { 91b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = *src++; 92b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru do { 93b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* usually "loops" once; twice only for writing subchar */ 94b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) { 95b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 96b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = (UChar)ch; 97b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 98b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 100b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 101b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if(0x10000 <= ch && ch <= 0x10ffff) { 102b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if((pDest + 2) <= destLimit) { 103b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = U16_LEAD(ch); 104b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = U16_TRAIL(ch); 105b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 106b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength += 2; 107b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 108b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru break; 109b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if((ch = subchar) < 0) { 110b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* surrogate code point, or not a Unicode code point at all */ 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 113b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 114b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++numSubstitutions; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } while(TRUE); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += (int32_t)(pDest - dest); 120b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDestLength) { 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 123b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 124b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = numSubstitutions; 125b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar* U_EXPORT2 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strFromUTF32(UChar *dest, 135b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 136b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 137b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar32 *src, 138b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 139b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 140b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return u_strFromUTF32WithSub( 141b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest, destCapacity, pDestLength, 142b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru src, srcLength, 143b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_SENTINEL, NULL, 144b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pErrorCode); 145b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar32* U_EXPORT2 148b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32WithSub(UChar32 *dest, 149b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 150b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 151b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *src, 152b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 153b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 154b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 155b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *srcLimit; 156b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 ch; 157b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar ch2; 158b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 *destLimit; 159b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 *pDest; 160b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t reqLength; 161b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t numSubstitutions; 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 164b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(U_FAILURE(*pErrorCode)){ 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 169b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 170b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ) { 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 176b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = 0; 177b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 178b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 179b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pDest = dest; 180b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru destLimit = dest + destCapacity; 181b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength = 0; 182b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru numSubstitutions = 0; 183b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 184b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(srcLength < 0) { 185b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* simple loop for conversion of a NUL-terminated BMP string */ 186b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) { 187b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 188b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 189b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = ch; 190b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 191b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++reqLength; 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 194b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src; 195b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(ch != 0) { 196b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* "complicated" case, find the end of the remaining string */ 197b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(*++srcLimit != 0) {} 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru srcLimit = src + srcLength; 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 202b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 203b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* convert with length */ 204b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru while(src < srcLimit) { 205b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = *src++; 206b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(!U16_IS_SURROGATE(ch)) { 207b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* write or count ch below */ 208b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) { 209b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++src; 210b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ch = U16_GET_SUPPLEMENTARY(ch, ch2); 211b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else if((ch = subchar) < 0) { 212b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru /* unpaired surrogate */ 213b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 214b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return NULL; 215b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru ++numSubstitutions; 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 218b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDest < destLimit) { 219b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pDest++ = ch; 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } else { 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru reqLength += (int32_t)(pDest - dest); 226b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pDestLength) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 229b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions != NULL) { 230b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions = numSubstitutions; 231b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 234b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste QueruU_CAPI UChar32* U_EXPORT2 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queruu_strToUTF32(UChar32 *dest, 241b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t destCapacity, 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t *pDestLength, 243b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru const UChar *src, 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t srcLength, 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UErrorCode *pErrorCode) { 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru return u_strToUTF32WithSub( 247b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru dest, destCapacity, pDestLength, 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru src, srcLength, 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U_SENTINEL, NULL, 250b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru pErrorCode); 251b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru} 252b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* for utf8_nextCharSafeBodyTerminated() */ 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar32 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_minLegal[4]={ 0, 0x80, 0x800, 0x10000 }; 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences: 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - checks for NUL termination instead of length 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1) 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte. 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte. 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyTerminated(const uint8_t **ps, UChar32 c) { 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s=*ps; 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trail, illegal=0; 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t count=UTF8_COUNT_TRAIL_BYTES(c); 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF8_MASK_LEAD_BYTE((c), count); 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each branch falls through to the next one */ 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f || c>=0x110) { 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte, or code point>0x10ffff (outside Unicode) */ 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f) { 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte */ 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=(uint8_t)(*s++ - 0x80); 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(trail>0x3f) { 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a trail byte */ 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|trail; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no default branch to optimize switch() - all values are covered */ 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal is also set if count>=4 */ 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error handling */ 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* don't go beyond this sequence */ 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=*ps; 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(count>0 && UTF8_IS_TRAIL(*s)) { 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s; 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --count; 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *ps=s; 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Version of utf8_nextCharSafeBody() with the following differences: 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - works with pointers instead of indexes 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - always strict (strict==-1) 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * *ps points to after the lead byte and will be moved to after the last trail byte. 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c is the lead byte. 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the code point, or U_SENTINEL 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_nextCharSafeBodyPointer(const uint8_t **ps, const uint8_t *limit, UChar32 c) { 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *s=*ps; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t trail, illegal=0; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t count=UTF8_COUNT_TRAIL_BYTES(c); 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((limit-s)>=count) { 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF8_MASK_LEAD_BYTE((c), count); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count==0 for illegally leading trail bytes and the illegal bytes 0xfe and 0xff */ 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(count) { 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* each branch falls through to the next one */ 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 5: 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 4: 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* count>=4 is always illegal: no more than 3 trail bytes in Unicode's UTF-8 */ 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 3: 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0x110) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point>0x10ffff, outside Unicode */ 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 2: 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 1: 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trail=*s++; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=(c<<6)|(trail&0x3f); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal|=(trail&0xc0)^0x80; 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no default branch to optimize switch() - all values are covered */ 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru illegal=1; /* too few bytes left */ 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* correct sequence - all trail bytes have (b7..b6)==(10)? */ 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* illegal is also set if count>=4 */ 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(illegal || c<utf8_minLegal[count] || UTF_IS_SURROGATE(c)) { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* error handling */ 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* don't go beyond this sequence */ 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=*ps; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(count>0 && s<limit && UTF8_IS_TRAIL(*s)) { 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s; 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --count; 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *ps=s; 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8WithSub(UChar *dest, 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* src, 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDest = dest; 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDestLimit = dest+destCapacity; 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength = 0; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t* pSrc = (const uint8_t*) src; 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t t1, t2; /* trail bytes */ 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSubstitutions; 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 422b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions!=NULL) { 423b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions=0; 424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSubstitutions=0; 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Inline processing of UTF-8 byte sequences: 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Byte sequences for the most common characters are handled inline in 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the conversion loops. In order to reduce the path lengths for those 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * characters, the tests are arranged in a kind of binary search. 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII (<=0x7f) is checked first, followed by the dividing point 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * between 2- and 3-byte sequences (0xe0). 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The 3-byte branch is tested first to speed up CJK text. 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The compiler should combine the subtractions for the two tests for 0xe0. 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each branch then tests for the other end of its range. 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0){ 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transform a NUL-terminated string. 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The code explicitly checks for NULs only in the lead byte position. 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A NUL byte in the trail byte position fails the trail byte range check anyway. 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch); 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0) { 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0xFFFF) { 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=UTF16_LEAD(ch); 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=UTF16_TRAIL(ch); 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pre-flight the rest of the string. */ 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch = *pSrc) != 0) { 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[2] - 0x80) <= 0x3f 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyTerminated(&pSrc, ch); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0) { 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += U16_LENGTH(ch); 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* srcLength >= 0 */ { 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *pSrcLimit = pSrc + srcLength; 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each iteration of the inner loop progresses by at most 3 UTF-8 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes and one UChar, for most characters. 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For supplementary code points (4 & 2), which are rare, 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there is an additional adjustment. 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = (int32_t)(pDestLimit - pDest); 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = (int32_t)((pSrcLimit - pSrc) / 3); 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count > srcLength) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = srcLength; /* min(remaining dest, remaining src/3) */ 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count < 3) { 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Too much overhead if we get near the end of the string, 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the next loop. 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch >= 0xf0 || subchar > 0xffff) { 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We may read up to six bytes and write up to two UChars, 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which we didn't account for with computing count, 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we adjust it here. 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(--count == 0) { 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else if(ch<=0xFFFF){ 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=UTF16_LEAD(ch); 60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=UTF16_TRAIL(ch); 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count > 0); 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((pSrc<pSrcLimit) && (pDest<pDestLimit)) { 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 3) && 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 2) && 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else if(ch<=0xFFFF){ 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=(UChar)ch; 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=UTF16_LEAD(ch); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit){ 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++)=UTF16_TRAIL(ch); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }else{ 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 65950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* do not fill the dest buffer just count the UChars needed */ 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc < pSrcLimit){ 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc; 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f){ 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch > 0xe0) { 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+1000..U+CFFF inline */ 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch <= 0xec && 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 3) && 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[2] - 0x80) <= 0x3f 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( /* handle U+0080..U+07FF inline */ 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch >= 0xc2 && 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((pSrcLimit - pSrc) >= 2) && 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t)(pSrc[1] - 0x80) <= 0x3f 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength++; 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* function call for "complicated" and error cases */ 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; /* continue after the lead byte */ 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<0 && (++numSubstitutions, ch = subchar) < 0){ 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=UTF_CHAR_LENGTH(ch); 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - dest); 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pNumSubstitutions!=NULL) { 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pNumSubstitutions=numSubstitutions; 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar* U_EXPORT2 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8(UChar *dest, 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* src, 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strFromUTF8WithSub( 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, pDestLength, 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SENTINEL, NULL, 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UChar * U_EXPORT2 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFromUTF8Lenient(UChar *dest, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDest = dest; 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength = 0; 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t* pSrc = (uint8_t*) src; 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 74850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) 75050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength < 0) { 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Transform a NUL-terminated string. */ 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *pDestLimit = dest+destCapacity; 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t t1, t2, t3; /* trail bytes */ 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0) { 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + t1 - 0x3080); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) { 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080; 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest < pDestLimit) { 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = 0xfffd; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*++pSrc != 0) {} 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Pre-flight the rest of the string. */ 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch = *pSrc) != 0) { 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0) { 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 2; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0 && pSrc[2] != 0) { 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) { 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength += 2; 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* srcLength >= 0 */ { 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *pSrcLimit = pSrc + srcLength; 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function requires that if srcLength is given, then it must be 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * destCapatity >= srcLength so that we need not check for 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * destination buffer overflow in the loop. 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destCapacity < srcLength) { 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength != NULL) { 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = srcLength; /* this likely overestimates the true destLength! */ 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_BUFFER_OVERFLOW_ERROR; 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 4) { 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc++; 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 12) + (*pSrc++ << 6); 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (*pSrc++ << 12); 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ << 6; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ - 0x3c82080; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(pSrc < pSrcLimit); 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrcLimit += 3; /* restore original pSrcLimit */ 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc < pSrcLimit) { 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = *pSrc++; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch < 0xc0) { 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ASCII, or a trail byte in lead position which is treated like 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a single-byte sequence for better character boundary 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resynchronization after illegal sequences. 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(UChar)ch; 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xe0) { /* U+0080..U+07FF */ 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pSrc < pSrcLimit) { 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3080 = (0xc0 << 6) + 0x80 */ 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch < 0xf0) { /* U+0800..U+FFFF */ 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 2) { 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x2080 = (0x80 << 6) + 0x80 */ 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 12) + (*pSrc++ << 6); 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 3; 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* f0..f4 */ { /* U+10000..U+10FFFF */ 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pSrcLimit - pSrc) >= 3) { 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = (ch << 18) + (*pSrc++ << 12); 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ << 6; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch += *pSrc++ - 0x3c82080; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_LEAD(ch); 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *(pDest++) = U16_TRAIL(ch); 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc += 4; 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* truncated character at the end */ 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++ = 0xfffd; 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - dest); 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return dest; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE uint8_t * 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_appendUTF8(uint8_t *pDest, UChar32 c) { 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((c)<=0x7f) { 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)c; 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0x7ff) { 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c>>6)|0xc0); 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c&0x3f)|0x80); 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c<=0xffff) { 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((c>>12)|0xe0); 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)&0x3f)|0x80); 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* if((uint32_t)(c)<=0x10ffff) */ { 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)>>18)|0xf0); 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((c)&0x3f)|0x80); 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pDest; 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8WithSub(char *dest, 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc, 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 subchar, int32_t *pNumSubstitutions, 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t reqLength=0; 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t ch=0,ch2=0; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *pDest = (uint8_t *)dest; 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *pDestLimit = pDest + destCapacity; 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numSubstitutions; 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* args check */ 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 98950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (pSrc==NULL && srcLength!=0) || srcLength < -1 || 99050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (destCapacity<0) || (dest == NULL && destCapacity > 0) || 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru subchar > 0x10ffff || U_IS_SURROGATE(subchar) 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 997b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(pNumSubstitutions!=NULL) { 998b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru *pNumSubstitutions=0; 999b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru } 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru numSubstitutions=0; 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch=*pSrc)!=0) { 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 100750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 2) { 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 2; 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 3) { 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 3; 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /*need not check for NUL because NUL fails UTF_IS_TRAIL() anyway*/ 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=UTF16_GET_PAIR_VALUE(ch, ch2); 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = U8_LENGTH(ch); 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= length) { 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = length; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((ch=*pSrc++)!=0) { 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<=0x7f) { 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0x7ff) { 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=2; 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!UTF_IS_SURROGATE(ch)) { 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=3; 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=4; 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=U8_LENGTH(subchar); 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrcLimit = pSrc+srcLength; 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Each iteration of the inner loop progresses by at most 3 UTF-8 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * bytes and one UChar, for most characters. 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For supplementary code points (4 & 2), which are rare, 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there is an additional adjustment. 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = (int32_t)((pDestLimit - pDest) / 3); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength = (int32_t)(pSrcLimit - pSrc); 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count > srcLength) { 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = srcLength; /* min(remaining dest/3, remaining src) */ 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count < 3) { 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Too much overhead if we get near the end of the string, 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the next loop. 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 110150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We will read two UChars and probably output four bytes, 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * which we didn't account for with computing count, 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we adjust it here. 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(--count == 0) { 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --pSrc; /* undo ch=*pSrc++ for the lead surrogate */ 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* recompute count */ 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(ch) && UTF_IS_TRAIL(ch2=*pSrc)) { 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=UTF16_GET_PAIR_VALUE(ch, ch2); 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* writing 4 bytes per 2 UChars is ok */ 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>18)|0xf0); 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80); 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(subchar>=0) { 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(--count > 0); 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc<pSrcLimit) { 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch <= 0x7f) { 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDest<pDestLimit) { 115050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 1; 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0x7ff) { 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 2) { 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>6)|0xc0); 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 2; 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch <= 0xd7ff || ch >= 0xe000) { 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= 3) { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch>>12)|0xe0); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDest++=(uint8_t)((ch&0x3f)|0x80); 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = 3; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* ch is a surrogate */ { 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=UTF16_GET_PAIR_VALUE(ch, ch2); 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=subchar; 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = U8_LENGTH(ch); 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((pDestLimit - pDest) >= length) { 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* convert and append*/ 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pDest=_appendUTF8(pDest, ch); 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength = length; 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(pSrc<pSrcLimit) { 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch=*pSrc++; 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(ch<=0x7f) { 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++reqLength; 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(ch<=0x7ff) { 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=2; 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(!UTF_IS_SURROGATE(ch)) { 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=3; 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(UTF_IS_SURROGATE_FIRST(ch) && pSrc<pSrcLimit && UTF_IS_TRAIL(ch2=*pSrc)) { 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pSrc; 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=4; 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(subchar>=0) { 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=U8_LENGTH(subchar); 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++numSubstitutions; 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Unicode 3.2 forbids surrogate code points in UTF-8 */ 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode = U_INVALID_CHAR_FOUND; 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reqLength+=(int32_t)(pDest - (uint8_t *)dest); 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pNumSubstitutions!=NULL) { 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pNumSubstitutions=numSubstitutions; 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pDestLength){ 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pDestLength = reqLength; 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate the buffer */ 123050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 123150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI char* U_EXPORT2 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUTF8(char *dest, 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destCapacity, 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *pDestLength, 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *pSrc, 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode){ 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strToUTF8WithSub( 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, pDestLength, 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pSrc, srcLength, 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SENTINEL, NULL, 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 124750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 124850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI UChar* U_EXPORT2 124950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strFromJavaModifiedUTF8WithSub( 125050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *dest, 125150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 125250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *pDestLength, 125350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *src, 125450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t srcLength, 125550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 subchar, int32_t *pNumSubstitutions, 125650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 125750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *pDest = dest; 125850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *pDestLimit = dest+destCapacity; 125950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 ch; 126050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t reqLength = 0; 126150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t* pSrc = (const uint8_t*) src; 126250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *pSrcLimit; 126350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t count; 126450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t t1, t2; /* trail bytes */ 126550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t numSubstitutions; 126650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 126750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* args check */ 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode)){ 126950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 127050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 127250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (dest==NULL && destCapacity!=0) || destCapacity<0 || 127350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho subchar > 0x10ffff || U_IS_SURROGATE(subchar) 127450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 127550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 127650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 127750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 127850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 127950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pNumSubstitutions!=NULL) { 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pNumSubstitutions=0; 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 128250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho numSubstitutions=0; 128350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 128450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcLength < 0) { 128550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 128650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Transform a NUL-terminated ASCII string. 128750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Handle non-ASCII strings with slower code. 128850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 128950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(((ch = *pSrc) != 0) && ch <= 0x7f && (pDest < pDestLimit)) { 129050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 129150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 129250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch == 0) { 129450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength=(int32_t)(pDest - dest); 129550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 129650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 129750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 129850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 129950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 130050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 130150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 130250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = uprv_strlen((const char *)pSrc); 130450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 130550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 130650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 130750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrcLimit = pSrc + srcLength; 130850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 130950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = (int32_t)(pDestLimit - pDest); 131050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = (int32_t)(pSrcLimit - pSrc); 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count >= srcLength && srcLength > 0 && *pSrc <= 0x7f) { 131250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* fast ASCII loop */ 131350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const uint8_t *prevSrc = pSrc; 131450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta; 131550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(pSrc < pSrcLimit && (ch = *pSrc) <= 0x7f) { 131650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 131750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 131850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 131950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delta = (int32_t)(pSrc - prevSrc); 132050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count -= delta; 132150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength -= delta; 132250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 132350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 132450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Each iteration of the inner loop progresses by at most 3 UTF-8 132550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * bytes and one UChar. 132650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 132750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength /= 3; 132850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count > srcLength) { 132950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = srcLength; /* min(remaining dest, remaining src/3) */ 133050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count < 3) { 133250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 133350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Too much overhead if we get near the end of the string, 133450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * continue with the next loop. 133550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 133650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 133750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 133850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 133950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 134050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f){ 134150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 134250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 134350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 134450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 134550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 134650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 134750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 134850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 134950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 135050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 135150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 135250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 135350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 135450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 135550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 135650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 135750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 135850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 135950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 136050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 136150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 136250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 136350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 136450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 136550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 136650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 136750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 136850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 136950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(subchar > 0xffff && --count == 0) { 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 137150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * We need to write two UChars, adjusted count for that, 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and ran out of space. 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 137550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 137650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 137750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 137850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 138050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar<=0xFFFF) { 138150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=(UChar)subchar; 138250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_LEAD(subchar); 138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_TRAIL(subchar); 138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 138750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(--count > 0); 138950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 139050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 139150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((pSrc<pSrcLimit) && (pDest<pDestLimit)) { 139250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 139350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f){ 139450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(UChar)ch; 139550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 139750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 3) && 140150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f && 140250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t2 = (uint8_t)(pSrc[2] - 0x80)) <= 0x3f 140350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 140450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ 140550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); 140650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 140750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 140850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 140950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 141050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 141150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 141250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 2) && 141350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (t1 = (uint8_t)(pSrc[1] - 0x80)) <= 0x3f 141450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 141550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); 141650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 141750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 141850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 141950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar<=0xFFFF) { 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=(UChar)subchar; 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_LEAD(subchar); 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDest<pDestLimit) { 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *(pDest++)=U16_TRAIL(subchar); 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 143850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* do not fill the dest buffer just count the UChars needed */ 144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(pSrc < pSrcLimit){ 144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch = *pSrc; 144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f) { 144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; 145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch >= 0xe0) { 145250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+FFFF inline */ 145350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch <= 0xef && 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 3) && 145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[1] - 0x80) <= 0x3f && 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[2] - 0x80) <= 0x3f 145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 3; 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( /* handle U+0000..U+07FF inline */ 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch >= 0xc0 && 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ((pSrcLimit - pSrc) >= 2) && 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (uint8_t)(pSrc[1] - 0x80) <= 0x3f 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength++; 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrc += 2; 147050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho continue; 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(subchar < 0) { 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_INVALID_CHAR_FOUND; 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* function call for error cases */ 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++pSrc; /* continue after the lead byte */ 148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho utf8_nextCharSafeBodyPointer(&pSrc, pSrcLimit, ch); 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++numSubstitutions; 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=U16_LENGTH(ch); 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 148750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pNumSubstitutions!=NULL) { 148850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pNumSubstitutions=numSubstitutions; 148950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=(int32_t)(pDest - dest); 149250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 149350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 149450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 149550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 149650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 149750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); 149850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 149950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 150050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 150150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI char* U_EXPORT2 150250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_strToJavaModifiedUTF8( 150350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *dest, 150450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t destCapacity, 150550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t *pDestLength, 150650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *src, 150750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t srcLength, 150850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 150950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t reqLength=0; 151027f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t ch=0; 151150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t *pDest = (uint8_t *)dest; 151250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho uint8_t *pDestLimit = pDest + destCapacity; 151350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *pSrcLimit; 151450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t count; 151550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 151650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* args check */ 151750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode)){ 151850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 151950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if( (src==NULL && srcLength!=0) || srcLength < -1 || 152150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho (dest==NULL && destCapacity!=0) || destCapacity<0 152250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ) { 152350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 152450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return NULL; 152550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 152650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 152750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(srcLength==-1) { 152850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Convert NUL-terminated ASCII, then find the string length. */ 152950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) { 153050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 153150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 153250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch == 0) { 153450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength=(int32_t)(pDest - (uint8_t *)dest); 153550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength) { 153650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 153750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 153850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 153950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 154050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 154150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 154250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = u_strlen(src); 154450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 154550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 154650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ 154750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pSrcLimit = src+srcLength; 154850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(;;) { 154950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = (int32_t)(pDestLimit - pDest); 155050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength = (int32_t)(pSrcLimit - src); 155150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count >= srcLength && srcLength > 0 && *src <= 0x7f) { 155250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* fast ASCII loop */ 155350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *prevSrc = src; 155450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t delta; 155550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) { 155650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)ch; 155750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++src; 155850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 155950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delta = (int32_t)(src - prevSrc); 156050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count -= delta; 156150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength -= delta; 156250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 156350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 156450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Each iteration of the inner loop progresses by at most 3 UTF-8 156550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * bytes and one UChar. 156650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 156750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count /= 3; 156850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count > srcLength) { 156950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho count = srcLength; /* min(remaining dest/3, remaining src) */ 157050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(count < 3) { 157250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 157350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Too much overhead if we get near the end of the string, 157450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * continue with the next loop. 157550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 157650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 157750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 157850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho do { 157950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 158050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 158150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 158250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch <= 0x7ff) { 158350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>6)|0xc0); 158450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 158550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 158650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>12)|0xe0); 158750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 158850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 158950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } while(--count > 0); 159150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 159250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 159350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src<pSrcLimit) { 159450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 159550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 159650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDest<pDestLimit) { 159750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++ = (uint8_t)ch; 159850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 159950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 1; 160050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 160150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 160250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch <= 0x7ff) { 160350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((pDestLimit - pDest) >= 2) { 160450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>6)|0xc0); 160550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 160650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 160750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 2; 160850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 160950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 161150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if((pDestLimit - pDest) >= 3) { 161250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch>>12)|0xe0); 161350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); 161450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDest++=(uint8_t)((ch&0x3f)|0x80); 161550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 161650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength = 3; 161750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 161850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 161950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 162150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(src<pSrcLimit) { 162250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ch=*src++; 162350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(ch <= 0x7f && ch != 0) { 162450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ++reqLength; 162550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else if(ch<=0x7ff) { 162650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=2; 162750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 162850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=3; 162950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 163250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho reqLength+=(int32_t)(pDest - (uint8_t *)dest); 163350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pDestLength){ 163450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pDestLength = reqLength; 163550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 163650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 163750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* Terminate the buffer */ 163850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_terminateChars(dest, destCapacity, reqLength, pErrorCode); 163950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return dest; 164050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 1641