1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* 6* Copyright (C) 2001-2012, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9******************************************************************************* 10* file name: ustr_wcs.cpp 11* encoding: US-ASCII 12* tab size: 8 (not used) 13* indentation:4 14* 15* created on: 2004sep07 16* created by: Markus W. Scherer 17* 18* u_strToWCS() and u_strFromWCS() functions 19* moved here from ustrtrns.c for better modularization. 20*/ 21 22#include "unicode/utypes.h" 23#include "unicode/ustring.h" 24#include "cstring.h" 25#include "cwchar.h" 26#include "cmemory.h" 27#include "ustr_imp.h" 28#include "ustr_cnv.h" 29 30#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION 31 32#define _STACK_BUFFER_CAPACITY 1000 33#define _BUFFER_CAPACITY_MULTIPLIER 2 34 35#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 36// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. 37// Then we could change this to work only with wchar_t buffers. 38static inline UBool 39u_growAnyBufferFromStatic(void *context, 40 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, 41 int32_t length, int32_t size) { 42 // Use char* not void* to avoid the compiler's strict-aliasing assumptions 43 // and related warnings. 44 char *newBuffer=(char *)uprv_malloc(reqCapacity*size); 45 if(newBuffer!=NULL) { 46 if(length>0) { 47 uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); 48 } 49 *pCapacity=reqCapacity; 50 } else { 51 *pCapacity=0; 52 } 53 54 /* release the old pBuffer if it was not statically allocated */ 55 if(*pBuffer!=(char *)context) { 56 uprv_free(*pBuffer); 57 } 58 59 *pBuffer=newBuffer; 60 return (UBool)(newBuffer!=NULL); 61} 62 63/* helper function */ 64static wchar_t* 65_strToWCS(wchar_t *dest, 66 int32_t destCapacity, 67 int32_t *pDestLength, 68 const UChar *src, 69 int32_t srcLength, 70 UErrorCode *pErrorCode){ 71 72 char stackBuffer [_STACK_BUFFER_CAPACITY]; 73 char* tempBuf = stackBuffer; 74 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; 75 char* tempBufLimit = stackBuffer + tempBufCapacity; 76 UConverter* conv = NULL; 77 char* saveBuf = tempBuf; 78 wchar_t* intTarget=NULL; 79 int32_t intTargetCapacity=0; 80 int count=0,retVal=0; 81 82 const UChar *pSrcLimit =NULL; 83 const UChar *pSrc = src; 84 85 conv = u_getDefaultConverter(pErrorCode); 86 87 if(U_FAILURE(*pErrorCode)){ 88 return NULL; 89 } 90 91 if(srcLength == -1){ 92 srcLength = u_strlen(pSrc); 93 } 94 95 pSrcLimit = pSrc + srcLength; 96 97 for(;;) { 98 /* reset the error state */ 99 *pErrorCode = U_ZERO_ERROR; 100 101 /* convert to chars using default converter */ 102 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); 103 count =(tempBuf - saveBuf); 104 105 /* This should rarely occur */ 106 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 107 tempBuf = saveBuf; 108 109 /* we dont have enough room on the stack grow the buffer */ 110 int32_t newCapacity = 2 * srcLength; 111 if(newCapacity <= tempBufCapacity) { 112 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; 113 } 114 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 115 newCapacity, count, 1)) { 116 goto cleanup; 117 } 118 119 saveBuf = tempBuf; 120 tempBufLimit = tempBuf + tempBufCapacity; 121 tempBuf = tempBuf + count; 122 123 } else { 124 break; 125 } 126 } 127 128 if(U_FAILURE(*pErrorCode)){ 129 goto cleanup; 130 } 131 132 /* done with conversion null terminate the char buffer */ 133 if(count>=tempBufCapacity){ 134 tempBuf = saveBuf; 135 /* we dont have enough room on the stack grow the buffer */ 136 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 137 count+1, count, 1)) { 138 goto cleanup; 139 } 140 saveBuf = tempBuf; 141 } 142 143 saveBuf[count]=0; 144 145 146 /* allocate more space than required 147 * here we assume that every char requires 148 * no more than 2 wchar_ts 149 */ 150 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; 151 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); 152 153 if(intTarget){ 154 155 int32_t nulLen = 0; 156 int32_t remaining = intTargetCapacity; 157 wchar_t* pIntTarget=intTarget; 158 tempBuf = saveBuf; 159 160 /* now convert the mbs to wcs */ 161 for(;;){ 162 163 /* we can call the system API since we are sure that 164 * there is atleast 1 null in the input 165 */ 166 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); 167 168 if(retVal==-1){ 169 *pErrorCode = U_INVALID_CHAR_FOUND; 170 break; 171 }else if(retVal== remaining){/* should never occur */ 172 int numWritten = (pIntTarget-intTarget); 173 u_growAnyBufferFromStatic(NULL,(void**) &intTarget, 174 &intTargetCapacity, 175 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, 176 numWritten, 177 sizeof(wchar_t)); 178 pIntTarget = intTarget; 179 remaining=intTargetCapacity; 180 181 if(nulLen!=count){ /*there are embedded nulls*/ 182 pIntTarget+=numWritten; 183 remaining-=numWritten; 184 } 185 186 }else{ 187 int32_t nulVal; 188 /*scan for nulls */ 189 /* we donot check for limit since tempBuf is null terminated */ 190 while(tempBuf[nulLen++] != 0){ 191 } 192 nulVal = (nulLen < srcLength) ? 1 : 0; 193 pIntTarget = pIntTarget + retVal+nulVal; 194 remaining -=(retVal+nulVal); 195 196 /* check if we have reached the source limit*/ 197 if(nulLen>=(count)){ 198 break; 199 } 200 } 201 } 202 count = (int32_t)(pIntTarget-intTarget); 203 204 if(0 < count && count <= destCapacity){ 205 uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); 206 } 207 208 if(pDestLength){ 209 *pDestLength = count; 210 } 211 212 /* free the allocated memory */ 213 uprv_free(intTarget); 214 215 }else{ 216 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 217 } 218cleanup: 219 /* are we still using stack buffer */ 220 if(stackBuffer != saveBuf){ 221 uprv_free(saveBuf); 222 } 223 u_terminateWChars(dest,destCapacity,count,pErrorCode); 224 225 u_releaseDefaultConverter(conv); 226 227 return dest; 228} 229#endif 230 231U_CAPI wchar_t* U_EXPORT2 232u_strToWCS(wchar_t *dest, 233 int32_t destCapacity, 234 int32_t *pDestLength, 235 const UChar *src, 236 int32_t srcLength, 237 UErrorCode *pErrorCode){ 238 239 /* args check */ 240 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 241 return NULL; 242 } 243 244 if( (src==NULL && srcLength!=0) || srcLength < -1 || 245 (destCapacity<0) || (dest == NULL && destCapacity > 0) 246 ) { 247 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 248 return NULL; 249 } 250 251#ifdef U_WCHAR_IS_UTF16 252 /* wchar_t is UTF-16 just do a memcpy */ 253 if(srcLength == -1){ 254 srcLength = u_strlen(src); 255 } 256 if(0 < srcLength && srcLength <= destCapacity){ 257 u_memcpy(dest, src, srcLength); 258 } 259 if(pDestLength){ 260 *pDestLength = srcLength; 261 } 262 263 u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); 264 265 return dest; 266 267#elif defined U_WCHAR_IS_UTF32 268 269 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, 270 src, srcLength, pErrorCode); 271 272#else 273 274 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); 275 276#endif 277 278} 279 280#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 281/* helper function */ 282static UChar* 283_strFromWCS( UChar *dest, 284 int32_t destCapacity, 285 int32_t *pDestLength, 286 const wchar_t *src, 287 int32_t srcLength, 288 UErrorCode *pErrorCode) 289{ 290 int32_t retVal =0, count =0 ; 291 UConverter* conv = NULL; 292 UChar* pTarget = NULL; 293 UChar* pTargetLimit = NULL; 294 UChar* target = NULL; 295 296 UChar uStack [_STACK_BUFFER_CAPACITY]; 297 298 wchar_t wStack[_STACK_BUFFER_CAPACITY]; 299 wchar_t* pWStack = wStack; 300 301 302 char cStack[_STACK_BUFFER_CAPACITY]; 303 int32_t cStackCap = _STACK_BUFFER_CAPACITY; 304 char* pCSrc=cStack; 305 char* pCSave=pCSrc; 306 char* pCSrcLimit=NULL; 307 308 const wchar_t* pSrc = src; 309 const wchar_t* pSrcLimit = NULL; 310 311 if(srcLength ==-1){ 312 /* if the wchar_t source is null terminated we can safely 313 * assume that there are no embedded nulls, this is a fast 314 * path for null terminated strings. 315 */ 316 for(;;){ 317 /* convert wchars to chars */ 318 retVal = uprv_wcstombs(pCSrc,src, cStackCap); 319 320 if(retVal == -1){ 321 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 322 goto cleanup; 323 }else if(retVal >= (cStackCap-1)){ 324 /* Should rarely occur */ 325 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 326 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); 327 pCSave = pCSrc; 328 }else{ 329 /* converted every thing */ 330 pCSrc = pCSrc+retVal; 331 break; 332 } 333 } 334 335 }else{ 336 /* here the source is not null terminated 337 * so it may have nulls embeded and we need to 338 * do some extra processing 339 */ 340 int32_t remaining =cStackCap; 341 342 pSrcLimit = src + srcLength; 343 344 for(;;){ 345 register int32_t nulLen = 0; 346 347 /* find nulls in the string */ 348 while(nulLen<srcLength && pSrc[nulLen++]!=0){ 349 } 350 351 if((pSrc+nulLen) < pSrcLimit){ 352 /* check if we have enough room in pCSrc */ 353 if(remaining < (nulLen * MB_CUR_MAX)){ 354 /* should rarely occur */ 355 int32_t len = (pCSrc-pCSave); 356 pCSrc = pCSave; 357 /* we do not have enough room so grow the buffer*/ 358 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 359 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 360 361 pCSave = pCSrc; 362 pCSrc = pCSave+len; 363 remaining = cStackCap-(pCSrc - pCSave); 364 } 365 366 /* we have found a null so convert the 367 * chunk from begining of non-null char to null 368 */ 369 retVal = uprv_wcstombs(pCSrc,pSrc,remaining); 370 371 if(retVal==-1){ 372 /* an error occurred bail out */ 373 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 374 goto cleanup; 375 } 376 377 pCSrc += retVal+1 /* already null terminated */; 378 379 pSrc += nulLen; /* skip past the null */ 380 srcLength-=nulLen; /* decrement the srcLength */ 381 remaining -= (pCSrc-pCSave); 382 383 384 }else{ 385 /* the source is not null terminated and we are 386 * end of source so we copy the source to a temp buffer 387 * null terminate it and convert wchar_ts to chars 388 */ 389 if(nulLen >= _STACK_BUFFER_CAPACITY){ 390 /* Should rarely occcur */ 391 /* allocate new buffer buffer */ 392 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); 393 if(pWStack==NULL){ 394 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 395 goto cleanup; 396 } 397 } 398 if(nulLen>0){ 399 /* copy the contents to tempStack */ 400 uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); 401 } 402 403 /* null terminate the tempBuffer */ 404 pWStack[nulLen] =0 ; 405 406 if(remaining < (nulLen * MB_CUR_MAX)){ 407 /* Should rarely occur */ 408 int32_t len = (pCSrc-pCSave); 409 pCSrc = pCSave; 410 /* we do not have enough room so grow the buffer*/ 411 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 412 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 413 414 pCSave = pCSrc; 415 pCSrc = pCSave+len; 416 remaining = cStackCap-(pCSrc - pCSave); 417 } 418 /* convert to chars */ 419 retVal = uprv_wcstombs(pCSrc,pWStack,remaining); 420 421 pCSrc += retVal; 422 pSrc += nulLen; 423 srcLength-=nulLen; /* decrement the srcLength */ 424 break; 425 } 426 } 427 } 428 429 /* OK..now we have converted from wchar_ts to chars now 430 * convert chars to UChars 431 */ 432 pCSrcLimit = pCSrc; 433 pCSrc = pCSave; 434 pTarget = target= dest; 435 pTargetLimit = dest + destCapacity; 436 437 conv= u_getDefaultConverter(pErrorCode); 438 439 if(U_FAILURE(*pErrorCode)|| conv==NULL){ 440 goto cleanup; 441 } 442 443 for(;;) { 444 445 *pErrorCode = U_ZERO_ERROR; 446 447 /* convert to stack buffer*/ 448 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); 449 450 /* increment count to number written to stack */ 451 count+= pTarget - target; 452 453 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 454 target = uStack; 455 pTarget = uStack; 456 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; 457 } else { 458 break; 459 } 460 461 } 462 463 if(pDestLength){ 464 *pDestLength =count; 465 } 466 467 u_terminateUChars(dest,destCapacity,count,pErrorCode); 468 469cleanup: 470 471 if(cStack != pCSave){ 472 uprv_free(pCSave); 473 } 474 475 if(wStack != pWStack){ 476 uprv_free(pWStack); 477 } 478 479 u_releaseDefaultConverter(conv); 480 481 return dest; 482} 483#endif 484 485U_CAPI UChar* U_EXPORT2 486u_strFromWCS(UChar *dest, 487 int32_t destCapacity, 488 int32_t *pDestLength, 489 const wchar_t *src, 490 int32_t srcLength, 491 UErrorCode *pErrorCode) 492{ 493 494 /* args check */ 495 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 496 return NULL; 497 } 498 499 if( (src==NULL && srcLength!=0) || srcLength < -1 || 500 (destCapacity<0) || (dest == NULL && destCapacity > 0) 501 ) { 502 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 503 return NULL; 504 } 505 506#ifdef U_WCHAR_IS_UTF16 507 /* wchar_t is UTF-16 just do a memcpy */ 508 if(srcLength == -1){ 509 srcLength = u_strlen((const UChar *)src); 510 } 511 if(0 < srcLength && srcLength <= destCapacity){ 512 u_memcpy(dest, src, srcLength); 513 } 514 if(pDestLength){ 515 *pDestLength = srcLength; 516 } 517 518 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 519 520 return dest; 521 522#elif defined U_WCHAR_IS_UTF32 523 524 return u_strFromUTF32(dest, destCapacity, pDestLength, 525 (UChar32*)src, srcLength, pErrorCode); 526 527#else 528 529 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); 530 531#endif 532 533} 534 535#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ 536