1/*
2*******************************************************************************
3*
4*   Copyright (C) 2001-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  ustr_wcs.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2004sep07
14*   created by: Markus W. Scherer
15*
16*   u_strToWCS() and u_strFromWCS() functions
17*   moved here from ustrtrns.c for better modularization.
18*/
19
20#include "unicode/utypes.h"
21#include "unicode/ustring.h"
22#include "cstring.h"
23#include "cwchar.h"
24#include "cmemory.h"
25#include "ustr_imp.h"
26#include "ustr_cnv.h"
27
28#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
29
30#define _STACK_BUFFER_CAPACITY 1000
31#define _BUFFER_CAPACITY_MULTIPLIER 2
32
33#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
34// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
35// Then we could change this to work only with wchar_t buffers.
36static inline UBool
37u_growAnyBufferFromStatic(void *context,
38                       void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
39                       int32_t length, int32_t size) {
40    // Use char* not void* to avoid the compiler's strict-aliasing assumptions
41    // and related warnings.
42    char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
43    if(newBuffer!=NULL) {
44        if(length>0) {
45            uprv_memcpy(newBuffer, *pBuffer, length*size);
46        }
47        *pCapacity=reqCapacity;
48    } else {
49        *pCapacity=0;
50    }
51
52    /* release the old pBuffer if it was not statically allocated */
53    if(*pBuffer!=(char *)context) {
54        uprv_free(*pBuffer);
55    }
56
57    *pBuffer=newBuffer;
58    return (UBool)(newBuffer!=NULL);
59}
60
61/* helper function */
62static wchar_t*
63_strToWCS(wchar_t *dest,
64           int32_t destCapacity,
65           int32_t *pDestLength,
66           const UChar *src,
67           int32_t srcLength,
68           UErrorCode *pErrorCode){
69
70    char stackBuffer [_STACK_BUFFER_CAPACITY];
71    char* tempBuf = stackBuffer;
72    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
73    char* tempBufLimit = stackBuffer + tempBufCapacity;
74    UConverter* conv = NULL;
75    char* saveBuf = tempBuf;
76    wchar_t* intTarget=NULL;
77    int32_t intTargetCapacity=0;
78    int count=0,retVal=0;
79
80    const UChar *pSrcLimit =NULL;
81    const UChar *pSrc = src;
82
83    conv = u_getDefaultConverter(pErrorCode);
84
85    if(U_FAILURE(*pErrorCode)){
86        return NULL;
87    }
88
89    if(srcLength == -1){
90        srcLength = u_strlen(pSrc);
91    }
92
93    pSrcLimit = pSrc + srcLength;
94
95    for(;;) {
96        /* reset the error state */
97        *pErrorCode = U_ZERO_ERROR;
98
99        /* convert to chars using default converter */
100        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
101        count =(tempBuf - saveBuf);
102
103        /* This should rarely occur */
104        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
105            tempBuf = saveBuf;
106
107            /* we dont have enough room on the stack grow the buffer */
108            int32_t newCapacity = 2 * srcLength;
109            if(newCapacity <= tempBufCapacity) {
110                newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
111            }
112            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
113                    newCapacity, count, 1)) {
114                goto cleanup;
115            }
116
117           saveBuf = tempBuf;
118           tempBufLimit = tempBuf + tempBufCapacity;
119           tempBuf = tempBuf + count;
120
121        } else {
122            break;
123        }
124    }
125
126    if(U_FAILURE(*pErrorCode)){
127        goto cleanup;
128    }
129
130    /* done with conversion null terminate the char buffer */
131    if(count>=tempBufCapacity){
132        tempBuf = saveBuf;
133        /* we dont have enough room on the stack grow the buffer */
134        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
135                count+1, count, 1)) {
136            goto cleanup;
137        }
138       saveBuf = tempBuf;
139    }
140
141    saveBuf[count]=0;
142
143
144    /* allocate more space than required
145     * here we assume that every char requires
146     * no more than 2 wchar_ts
147     */
148    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
149    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
150
151    if(intTarget){
152
153        int32_t nulLen = 0;
154        int32_t remaining = intTargetCapacity;
155        wchar_t* pIntTarget=intTarget;
156        tempBuf = saveBuf;
157
158        /* now convert the mbs to wcs */
159        for(;;){
160
161            /* we can call the system API since we are sure that
162             * there is atleast 1 null in the input
163             */
164            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
165
166            if(retVal==-1){
167                *pErrorCode = U_INVALID_CHAR_FOUND;
168                break;
169            }else if(retVal== remaining){/* should never occur */
170                int numWritten = (pIntTarget-intTarget);
171                u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
172                                          &intTargetCapacity,
173                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
174                                          numWritten,
175                                          sizeof(wchar_t));
176                pIntTarget = intTarget;
177                remaining=intTargetCapacity;
178
179                if(nulLen!=count){ /*there are embedded nulls*/
180                    pIntTarget+=numWritten;
181                    remaining-=numWritten;
182                }
183
184            }else{
185                int32_t nulVal;
186                /*scan for nulls */
187                /* we donot check for limit since tempBuf is null terminated */
188                while(tempBuf[nulLen++] != 0){
189                }
190                nulVal = (nulLen < srcLength) ? 1 : 0;
191                pIntTarget = pIntTarget + retVal+nulVal;
192                remaining -=(retVal+nulVal);
193
194                /* check if we have reached the source limit*/
195                if(nulLen>=(count)){
196                    break;
197                }
198            }
199        }
200        count = (int32_t)(pIntTarget-intTarget);
201
202        if(0 < count && count <= destCapacity){
203            uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
204        }
205
206        if(pDestLength){
207            *pDestLength = count;
208        }
209
210        /* free the allocated memory */
211        uprv_free(intTarget);
212
213    }else{
214        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
215    }
216cleanup:
217    /* are we still using stack buffer */
218    if(stackBuffer != saveBuf){
219        uprv_free(saveBuf);
220    }
221    u_terminateWChars(dest,destCapacity,count,pErrorCode);
222
223    u_releaseDefaultConverter(conv);
224
225    return dest;
226}
227#endif
228
229U_CAPI wchar_t* U_EXPORT2
230u_strToWCS(wchar_t *dest,
231           int32_t destCapacity,
232           int32_t *pDestLength,
233           const UChar *src,
234           int32_t srcLength,
235           UErrorCode *pErrorCode){
236
237    /* args check */
238    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
239        return NULL;
240    }
241
242    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
243        (destCapacity<0) || (dest == NULL && destCapacity > 0)
244    ) {
245        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
246        return NULL;
247    }
248
249#ifdef U_WCHAR_IS_UTF16
250    /* wchar_t is UTF-16 just do a memcpy */
251    if(srcLength == -1){
252        srcLength = u_strlen(src);
253    }
254    if(0 < srcLength && srcLength <= destCapacity){
255        uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
256    }
257    if(pDestLength){
258       *pDestLength = srcLength;
259    }
260
261    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
262
263    return dest;
264
265#elif defined U_WCHAR_IS_UTF32
266
267    return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
268                                  src, srcLength, pErrorCode);
269
270#else
271
272    return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
273
274#endif
275
276}
277
278#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
279/* helper function */
280static UChar*
281_strFromWCS( UChar   *dest,
282             int32_t destCapacity,
283             int32_t *pDestLength,
284             const wchar_t *src,
285             int32_t srcLength,
286             UErrorCode *pErrorCode)
287{
288    int32_t retVal =0, count =0 ;
289    UConverter* conv = NULL;
290    UChar* pTarget = NULL;
291    UChar* pTargetLimit = NULL;
292    UChar* target = NULL;
293
294    UChar uStack [_STACK_BUFFER_CAPACITY];
295
296    wchar_t wStack[_STACK_BUFFER_CAPACITY];
297    wchar_t* pWStack = wStack;
298
299
300    char cStack[_STACK_BUFFER_CAPACITY];
301    int32_t cStackCap = _STACK_BUFFER_CAPACITY;
302    char* pCSrc=cStack;
303    char* pCSave=pCSrc;
304    char* pCSrcLimit=NULL;
305
306    const wchar_t* pSrc = src;
307    const wchar_t* pSrcLimit = NULL;
308
309    if(srcLength ==-1){
310        /* if the wchar_t source is null terminated we can safely
311         * assume that there are no embedded nulls, this is a fast
312         * path for null terminated strings.
313         */
314        for(;;){
315            /* convert wchars  to chars */
316            retVal = uprv_wcstombs(pCSrc,src, cStackCap);
317
318            if(retVal == -1){
319                *pErrorCode = U_ILLEGAL_CHAR_FOUND;
320                goto cleanup;
321            }else if(retVal >= (cStackCap-1)){
322                /* Should rarely occur */
323                u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
324                    cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
325                pCSave = pCSrc;
326            }else{
327                /* converted every thing */
328                pCSrc = pCSrc+retVal;
329                break;
330            }
331        }
332
333    }else{
334        /* here the source is not null terminated
335         * so it may have nulls embeded and we need to
336         * do some extra processing
337         */
338        int32_t remaining =cStackCap;
339
340        pSrcLimit = src + srcLength;
341
342        for(;;){
343            register int32_t nulLen = 0;
344
345            /* find nulls in the string */
346            while(nulLen<srcLength && pSrc[nulLen++]!=0){
347            }
348
349            if((pSrc+nulLen) < pSrcLimit){
350                /* check if we have enough room in pCSrc */
351                if(remaining < (nulLen * MB_CUR_MAX)){
352                    /* should rarely occur */
353                    int32_t len = (pCSrc-pCSave);
354                    pCSrc = pCSave;
355                    /* we do not have enough room so grow the buffer*/
356                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
357                           _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
358
359                    pCSave = pCSrc;
360                    pCSrc = pCSave+len;
361                    remaining = cStackCap-(pCSrc - pCSave);
362                }
363
364                /* we have found a null  so convert the
365                 * chunk from begining of non-null char to null
366                 */
367                retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
368
369                if(retVal==-1){
370                    /* an error occurred bail out */
371                    *pErrorCode = U_ILLEGAL_CHAR_FOUND;
372                    goto cleanup;
373                }
374
375                pCSrc += retVal+1 /* already null terminated */;
376
377                pSrc += nulLen; /* skip past the null */
378                srcLength-=nulLen; /* decrement the srcLength */
379                remaining -= (pCSrc-pCSave);
380
381
382            }else{
383                /* the source is not null terminated and we are
384                 * end of source so we copy the source to a temp buffer
385                 * null terminate it and convert wchar_ts to chars
386                 */
387                if(nulLen >= _STACK_BUFFER_CAPACITY){
388                    /* Should rarely occcur */
389                    /* allocate new buffer buffer */
390                    pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
391                    if(pWStack==NULL){
392                        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
393                        goto cleanup;
394                    }
395                }
396                if(nulLen>0){
397                    /* copy the contents to tempStack */
398                    uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
399                }
400
401                /* null terminate the tempBuffer */
402                pWStack[nulLen] =0 ;
403
404                if(remaining < (nulLen * MB_CUR_MAX)){
405                    /* Should rarely occur */
406                    int32_t len = (pCSrc-pCSave);
407                    pCSrc = pCSave;
408                    /* we do not have enough room so grow the buffer*/
409                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
410                           cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
411
412                    pCSave = pCSrc;
413                    pCSrc = pCSave+len;
414                    remaining = cStackCap-(pCSrc - pCSave);
415                }
416                /* convert to chars */
417                retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
418
419                pCSrc += retVal;
420                pSrc  += nulLen;
421                srcLength-=nulLen; /* decrement the srcLength */
422                break;
423            }
424        }
425    }
426
427    /* OK..now we have converted from wchar_ts to chars now
428     * convert chars to UChars
429     */
430    pCSrcLimit = pCSrc;
431    pCSrc = pCSave;
432    pTarget = target= dest;
433    pTargetLimit = dest + destCapacity;
434
435    conv= u_getDefaultConverter(pErrorCode);
436
437    if(U_FAILURE(*pErrorCode)|| conv==NULL){
438        goto cleanup;
439    }
440
441    for(;;) {
442
443        *pErrorCode = U_ZERO_ERROR;
444
445        /* convert to stack buffer*/
446        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
447
448        /* increment count to number written to stack */
449        count+= pTarget - target;
450
451        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
452            target = uStack;
453            pTarget = uStack;
454            pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
455        } else {
456            break;
457        }
458
459    }
460
461    if(pDestLength){
462        *pDestLength =count;
463    }
464
465    u_terminateUChars(dest,destCapacity,count,pErrorCode);
466
467cleanup:
468
469    if(cStack != pCSave){
470        uprv_free(pCSave);
471    }
472
473    if(wStack != pWStack){
474        uprv_free(pWStack);
475    }
476
477    u_releaseDefaultConverter(conv);
478
479    return dest;
480}
481#endif
482
483U_CAPI UChar* U_EXPORT2
484u_strFromWCS(UChar   *dest,
485             int32_t destCapacity,
486             int32_t *pDestLength,
487             const wchar_t *src,
488             int32_t srcLength,
489             UErrorCode *pErrorCode)
490{
491
492    /* args check */
493    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
494        return NULL;
495    }
496
497    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
498        (destCapacity<0) || (dest == NULL && destCapacity > 0)
499    ) {
500        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
501        return NULL;
502    }
503
504#ifdef U_WCHAR_IS_UTF16
505    /* wchar_t is UTF-16 just do a memcpy */
506    if(srcLength == -1){
507        srcLength = u_strlen(src);
508    }
509    if(0 < srcLength && srcLength <= destCapacity){
510        uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
511    }
512    if(pDestLength){
513       *pDestLength = srcLength;
514    }
515
516    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
517
518    return dest;
519
520#elif defined U_WCHAR_IS_UTF32
521
522    return u_strFromUTF32(dest, destCapacity, pDestLength,
523                          (UChar32*)src, srcLength, pErrorCode);
524
525#else
526
527    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
528
529#endif
530
531}
532
533#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
534