1/* 2******************************************************************************* 3* 4* Copyright (C) 2001-2012, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: ustr_wcs.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2004sep07 14* created by: Markus W. Scherer 15* 16* u_strToWCS() and u_strFromWCS() functions 17* moved here from ustrtrns.c for better modularization. 18*/ 19 20#include "unicode/utypes.h" 21#include "unicode/ustring.h" 22#include "cstring.h" 23#include "cwchar.h" 24#include "cmemory.h" 25#include "ustr_imp.h" 26#include "ustr_cnv.h" 27 28#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION 29 30#define _STACK_BUFFER_CAPACITY 1000 31#define _BUFFER_CAPACITY_MULTIPLIER 2 32 33#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 34// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. 35// Then we could change this to work only with wchar_t buffers. 36static inline UBool 37u_growAnyBufferFromStatic(void *context, 38 void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, 39 int32_t length, int32_t size) { 40 // Use char* not void* to avoid the compiler's strict-aliasing assumptions 41 // and related warnings. 42 char *newBuffer=(char *)uprv_malloc(reqCapacity*size); 43 if(newBuffer!=NULL) { 44 if(length>0) { 45 uprv_memcpy(newBuffer, *pBuffer, length*size); 46 } 47 *pCapacity=reqCapacity; 48 } else { 49 *pCapacity=0; 50 } 51 52 /* release the old pBuffer if it was not statically allocated */ 53 if(*pBuffer!=(char *)context) { 54 uprv_free(*pBuffer); 55 } 56 57 *pBuffer=newBuffer; 58 return (UBool)(newBuffer!=NULL); 59} 60 61/* helper function */ 62static wchar_t* 63_strToWCS(wchar_t *dest, 64 int32_t destCapacity, 65 int32_t *pDestLength, 66 const UChar *src, 67 int32_t srcLength, 68 UErrorCode *pErrorCode){ 69 70 char stackBuffer [_STACK_BUFFER_CAPACITY]; 71 char* tempBuf = stackBuffer; 72 int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; 73 char* tempBufLimit = stackBuffer + tempBufCapacity; 74 UConverter* conv = NULL; 75 char* saveBuf = tempBuf; 76 wchar_t* intTarget=NULL; 77 int32_t intTargetCapacity=0; 78 int count=0,retVal=0; 79 80 const UChar *pSrcLimit =NULL; 81 const UChar *pSrc = src; 82 83 conv = u_getDefaultConverter(pErrorCode); 84 85 if(U_FAILURE(*pErrorCode)){ 86 return NULL; 87 } 88 89 if(srcLength == -1){ 90 srcLength = u_strlen(pSrc); 91 } 92 93 pSrcLimit = pSrc + srcLength; 94 95 for(;;) { 96 /* reset the error state */ 97 *pErrorCode = U_ZERO_ERROR; 98 99 /* convert to chars using default converter */ 100 ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); 101 count =(tempBuf - saveBuf); 102 103 /* This should rarely occur */ 104 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 105 tempBuf = saveBuf; 106 107 /* we dont have enough room on the stack grow the buffer */ 108 int32_t newCapacity = 2 * srcLength; 109 if(newCapacity <= tempBufCapacity) { 110 newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; 111 } 112 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 113 newCapacity, count, 1)) { 114 goto cleanup; 115 } 116 117 saveBuf = tempBuf; 118 tempBufLimit = tempBuf + tempBufCapacity; 119 tempBuf = tempBuf + count; 120 121 } else { 122 break; 123 } 124 } 125 126 if(U_FAILURE(*pErrorCode)){ 127 goto cleanup; 128 } 129 130 /* done with conversion null terminate the char buffer */ 131 if(count>=tempBufCapacity){ 132 tempBuf = saveBuf; 133 /* we dont have enough room on the stack grow the buffer */ 134 if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 135 count+1, count, 1)) { 136 goto cleanup; 137 } 138 saveBuf = tempBuf; 139 } 140 141 saveBuf[count]=0; 142 143 144 /* allocate more space than required 145 * here we assume that every char requires 146 * no more than 2 wchar_ts 147 */ 148 intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; 149 intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); 150 151 if(intTarget){ 152 153 int32_t nulLen = 0; 154 int32_t remaining = intTargetCapacity; 155 wchar_t* pIntTarget=intTarget; 156 tempBuf = saveBuf; 157 158 /* now convert the mbs to wcs */ 159 for(;;){ 160 161 /* we can call the system API since we are sure that 162 * there is atleast 1 null in the input 163 */ 164 retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); 165 166 if(retVal==-1){ 167 *pErrorCode = U_INVALID_CHAR_FOUND; 168 break; 169 }else if(retVal== remaining){/* should never occur */ 170 int numWritten = (pIntTarget-intTarget); 171 u_growAnyBufferFromStatic(NULL,(void**) &intTarget, 172 &intTargetCapacity, 173 intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, 174 numWritten, 175 sizeof(wchar_t)); 176 pIntTarget = intTarget; 177 remaining=intTargetCapacity; 178 179 if(nulLen!=count){ /*there are embedded nulls*/ 180 pIntTarget+=numWritten; 181 remaining-=numWritten; 182 } 183 184 }else{ 185 int32_t nulVal; 186 /*scan for nulls */ 187 /* we donot check for limit since tempBuf is null terminated */ 188 while(tempBuf[nulLen++] != 0){ 189 } 190 nulVal = (nulLen < srcLength) ? 1 : 0; 191 pIntTarget = pIntTarget + retVal+nulVal; 192 remaining -=(retVal+nulVal); 193 194 /* check if we have reached the source limit*/ 195 if(nulLen>=(count)){ 196 break; 197 } 198 } 199 } 200 count = (int32_t)(pIntTarget-intTarget); 201 202 if(0 < count && count <= destCapacity){ 203 uprv_memcpy(dest,intTarget,count*sizeof(wchar_t)); 204 } 205 206 if(pDestLength){ 207 *pDestLength = count; 208 } 209 210 /* free the allocated memory */ 211 uprv_free(intTarget); 212 213 }else{ 214 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 215 } 216cleanup: 217 /* are we still using stack buffer */ 218 if(stackBuffer != saveBuf){ 219 uprv_free(saveBuf); 220 } 221 u_terminateWChars(dest,destCapacity,count,pErrorCode); 222 223 u_releaseDefaultConverter(conv); 224 225 return dest; 226} 227#endif 228 229U_CAPI wchar_t* U_EXPORT2 230u_strToWCS(wchar_t *dest, 231 int32_t destCapacity, 232 int32_t *pDestLength, 233 const UChar *src, 234 int32_t srcLength, 235 UErrorCode *pErrorCode){ 236 237 /* args check */ 238 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 239 return NULL; 240 } 241 242 if( (src==NULL && srcLength!=0) || srcLength < -1 || 243 (destCapacity<0) || (dest == NULL && destCapacity > 0) 244 ) { 245 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 246 return NULL; 247 } 248 249#ifdef U_WCHAR_IS_UTF16 250 /* wchar_t is UTF-16 just do a memcpy */ 251 if(srcLength == -1){ 252 srcLength = u_strlen(src); 253 } 254 if(0 < srcLength && srcLength <= destCapacity){ 255 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); 256 } 257 if(pDestLength){ 258 *pDestLength = srcLength; 259 } 260 261 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 262 263 return dest; 264 265#elif defined U_WCHAR_IS_UTF32 266 267 return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, 268 src, srcLength, pErrorCode); 269 270#else 271 272 return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); 273 274#endif 275 276} 277 278#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) 279/* helper function */ 280static UChar* 281_strFromWCS( UChar *dest, 282 int32_t destCapacity, 283 int32_t *pDestLength, 284 const wchar_t *src, 285 int32_t srcLength, 286 UErrorCode *pErrorCode) 287{ 288 int32_t retVal =0, count =0 ; 289 UConverter* conv = NULL; 290 UChar* pTarget = NULL; 291 UChar* pTargetLimit = NULL; 292 UChar* target = NULL; 293 294 UChar uStack [_STACK_BUFFER_CAPACITY]; 295 296 wchar_t wStack[_STACK_BUFFER_CAPACITY]; 297 wchar_t* pWStack = wStack; 298 299 300 char cStack[_STACK_BUFFER_CAPACITY]; 301 int32_t cStackCap = _STACK_BUFFER_CAPACITY; 302 char* pCSrc=cStack; 303 char* pCSave=pCSrc; 304 char* pCSrcLimit=NULL; 305 306 const wchar_t* pSrc = src; 307 const wchar_t* pSrcLimit = NULL; 308 309 if(srcLength ==-1){ 310 /* if the wchar_t source is null terminated we can safely 311 * assume that there are no embedded nulls, this is a fast 312 * path for null terminated strings. 313 */ 314 for(;;){ 315 /* convert wchars to chars */ 316 retVal = uprv_wcstombs(pCSrc,src, cStackCap); 317 318 if(retVal == -1){ 319 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 320 goto cleanup; 321 }else if(retVal >= (cStackCap-1)){ 322 /* Should rarely occur */ 323 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 324 cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); 325 pCSave = pCSrc; 326 }else{ 327 /* converted every thing */ 328 pCSrc = pCSrc+retVal; 329 break; 330 } 331 } 332 333 }else{ 334 /* here the source is not null terminated 335 * so it may have nulls embeded and we need to 336 * do some extra processing 337 */ 338 int32_t remaining =cStackCap; 339 340 pSrcLimit = src + srcLength; 341 342 for(;;){ 343 register int32_t nulLen = 0; 344 345 /* find nulls in the string */ 346 while(nulLen<srcLength && pSrc[nulLen++]!=0){ 347 } 348 349 if((pSrc+nulLen) < pSrcLimit){ 350 /* check if we have enough room in pCSrc */ 351 if(remaining < (nulLen * MB_CUR_MAX)){ 352 /* should rarely occur */ 353 int32_t len = (pCSrc-pCSave); 354 pCSrc = pCSave; 355 /* we do not have enough room so grow the buffer*/ 356 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 357 _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 358 359 pCSave = pCSrc; 360 pCSrc = pCSave+len; 361 remaining = cStackCap-(pCSrc - pCSave); 362 } 363 364 /* we have found a null so convert the 365 * chunk from begining of non-null char to null 366 */ 367 retVal = uprv_wcstombs(pCSrc,pSrc,remaining); 368 369 if(retVal==-1){ 370 /* an error occurred bail out */ 371 *pErrorCode = U_ILLEGAL_CHAR_FOUND; 372 goto cleanup; 373 } 374 375 pCSrc += retVal+1 /* already null terminated */; 376 377 pSrc += nulLen; /* skip past the null */ 378 srcLength-=nulLen; /* decrement the srcLength */ 379 remaining -= (pCSrc-pCSave); 380 381 382 }else{ 383 /* the source is not null terminated and we are 384 * end of source so we copy the source to a temp buffer 385 * null terminate it and convert wchar_ts to chars 386 */ 387 if(nulLen >= _STACK_BUFFER_CAPACITY){ 388 /* Should rarely occcur */ 389 /* allocate new buffer buffer */ 390 pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); 391 if(pWStack==NULL){ 392 *pErrorCode = U_MEMORY_ALLOCATION_ERROR; 393 goto cleanup; 394 } 395 } 396 if(nulLen>0){ 397 /* copy the contents to tempStack */ 398 uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t)); 399 } 400 401 /* null terminate the tempBuffer */ 402 pWStack[nulLen] =0 ; 403 404 if(remaining < (nulLen * MB_CUR_MAX)){ 405 /* Should rarely occur */ 406 int32_t len = (pCSrc-pCSave); 407 pCSrc = pCSave; 408 /* we do not have enough room so grow the buffer*/ 409 u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, 410 cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); 411 412 pCSave = pCSrc; 413 pCSrc = pCSave+len; 414 remaining = cStackCap-(pCSrc - pCSave); 415 } 416 /* convert to chars */ 417 retVal = uprv_wcstombs(pCSrc,pWStack,remaining); 418 419 pCSrc += retVal; 420 pSrc += nulLen; 421 srcLength-=nulLen; /* decrement the srcLength */ 422 break; 423 } 424 } 425 } 426 427 /* OK..now we have converted from wchar_ts to chars now 428 * convert chars to UChars 429 */ 430 pCSrcLimit = pCSrc; 431 pCSrc = pCSave; 432 pTarget = target= dest; 433 pTargetLimit = dest + destCapacity; 434 435 conv= u_getDefaultConverter(pErrorCode); 436 437 if(U_FAILURE(*pErrorCode)|| conv==NULL){ 438 goto cleanup; 439 } 440 441 for(;;) { 442 443 *pErrorCode = U_ZERO_ERROR; 444 445 /* convert to stack buffer*/ 446 ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); 447 448 /* increment count to number written to stack */ 449 count+= pTarget - target; 450 451 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ 452 target = uStack; 453 pTarget = uStack; 454 pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; 455 } else { 456 break; 457 } 458 459 } 460 461 if(pDestLength){ 462 *pDestLength =count; 463 } 464 465 u_terminateUChars(dest,destCapacity,count,pErrorCode); 466 467cleanup: 468 469 if(cStack != pCSave){ 470 uprv_free(pCSave); 471 } 472 473 if(wStack != pWStack){ 474 uprv_free(pWStack); 475 } 476 477 u_releaseDefaultConverter(conv); 478 479 return dest; 480} 481#endif 482 483U_CAPI UChar* U_EXPORT2 484u_strFromWCS(UChar *dest, 485 int32_t destCapacity, 486 int32_t *pDestLength, 487 const wchar_t *src, 488 int32_t srcLength, 489 UErrorCode *pErrorCode) 490{ 491 492 /* args check */ 493 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ 494 return NULL; 495 } 496 497 if( (src==NULL && srcLength!=0) || srcLength < -1 || 498 (destCapacity<0) || (dest == NULL && destCapacity > 0) 499 ) { 500 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; 501 return NULL; 502 } 503 504#ifdef U_WCHAR_IS_UTF16 505 /* wchar_t is UTF-16 just do a memcpy */ 506 if(srcLength == -1){ 507 srcLength = u_strlen(src); 508 } 509 if(0 < srcLength && srcLength <= destCapacity){ 510 uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR); 511 } 512 if(pDestLength){ 513 *pDestLength = srcLength; 514 } 515 516 u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); 517 518 return dest; 519 520#elif defined U_WCHAR_IS_UTF32 521 522 return u_strFromUTF32(dest, destCapacity, pDestLength, 523 (UChar32*)src, srcLength, pErrorCode); 524 525#else 526 527 return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); 528 529#endif 530 531} 532 533#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ 534