1/*
2*******************************************************************************
3*
4*   Copyright (C) 2001-2010, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  ustr_wcs.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2004sep07
14*   created by: Markus W. Scherer
15*
16*   u_strToWCS() and u_strFromWCS() functions
17*   moved here from ustrtrns.c for better modularization.
18*/
19
20#include "unicode/utypes.h"
21#include "unicode/ustring.h"
22#include "cstring.h"
23#include "cwchar.h"
24#include "cmemory.h"
25#include "ustr_imp.h"
26#include "ustr_cnv.h"
27
28#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
29
30#define _STACK_BUFFER_CAPACITY 1000
31#define _BUFFER_CAPACITY_MULTIPLIER 2
32
33#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
34static U_INLINE UBool
35u_growAnyBufferFromStatic(void *context,
36                       void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
37                       int32_t length, int32_t size) {
38
39    void *newBuffer=uprv_malloc(reqCapacity*size);
40    if(newBuffer!=NULL) {
41        if(length>0) {
42            uprv_memcpy(newBuffer, *pBuffer, length*size);
43        }
44        *pCapacity=reqCapacity;
45    } else {
46        *pCapacity=0;
47    }
48
49    /* release the old pBuffer if it was not statically allocated */
50    if(*pBuffer!=(void *)context) {
51        uprv_free(*pBuffer);
52    }
53
54    *pBuffer=newBuffer;
55    return (UBool)(newBuffer!=NULL);
56}
57
58/* helper function */
59static wchar_t*
60_strToWCS(wchar_t *dest,
61           int32_t destCapacity,
62           int32_t *pDestLength,
63           const UChar *src,
64           int32_t srcLength,
65           UErrorCode *pErrorCode){
66
67    char stackBuffer [_STACK_BUFFER_CAPACITY];
68    char* tempBuf = stackBuffer;
69    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
70    char* tempBufLimit = stackBuffer + tempBufCapacity;
71    UConverter* conv = NULL;
72    char* saveBuf = tempBuf;
73    wchar_t* intTarget=NULL;
74    int32_t intTargetCapacity=0;
75    int count=0,retVal=0;
76
77    const UChar *pSrcLimit =NULL;
78    const UChar *pSrc = src;
79
80    conv = u_getDefaultConverter(pErrorCode);
81
82    if(U_FAILURE(*pErrorCode)){
83        return NULL;
84    }
85
86    if(srcLength == -1){
87        srcLength = u_strlen(pSrc);
88    }
89
90    pSrcLimit = pSrc + srcLength;
91
92    for(;;) {
93        /* reset the error state */
94        *pErrorCode = U_ZERO_ERROR;
95
96        /* convert to chars using default converter */
97        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
98        count =(tempBuf - saveBuf);
99
100        /* This should rarely occur */
101        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
102            tempBuf = saveBuf;
103
104            /* we dont have enough room on the stack grow the buffer */
105            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
106                (_BUFFER_CAPACITY_MULTIPLIER * (srcLength)), count,sizeof(char))){
107                goto cleanup;
108            }
109
110           saveBuf = tempBuf;
111           tempBufLimit = tempBuf + tempBufCapacity;
112           tempBuf = tempBuf + count;
113
114        } else {
115            break;
116        }
117    }
118
119    if(U_FAILURE(*pErrorCode)){
120        goto cleanup;
121    }
122
123    /* done with conversion null terminate the char buffer */
124    if(count>=tempBufCapacity){
125        tempBuf = saveBuf;
126        /* we dont have enough room on the stack grow the buffer */
127        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
128            tempBufCapacity-count+1, count,sizeof(char))){
129            goto cleanup;
130        }
131       saveBuf = tempBuf;
132    }
133
134    saveBuf[count]=0;
135
136
137    /* allocate more space than required
138     * here we assume that every char requires
139     * no more than 2 wchar_ts
140     */
141    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
142    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
143
144    if(intTarget){
145
146        int32_t nulLen = 0;
147        int32_t remaining = intTargetCapacity;
148        wchar_t* pIntTarget=intTarget;
149        tempBuf = saveBuf;
150
151        /* now convert the mbs to wcs */
152        for(;;){
153
154            /* we can call the system API since we are sure that
155             * there is atleast 1 null in the input
156             */
157            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
158
159            if(retVal==-1){
160                *pErrorCode = U_INVALID_CHAR_FOUND;
161                break;
162            }else if(retVal== remaining){/* should never occur */
163                int numWritten = (pIntTarget-intTarget);
164                u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
165                                          &intTargetCapacity,
166                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
167                                          numWritten,
168                                          sizeof(wchar_t));
169                pIntTarget = intTarget;
170                remaining=intTargetCapacity;
171
172                if(nulLen!=count){ /*there are embedded nulls*/
173                    pIntTarget+=numWritten;
174                    remaining-=numWritten;
175                }
176
177            }else{
178                int32_t nulVal;
179                /*scan for nulls */
180                /* we donot check for limit since tempBuf is null terminated */
181                while(tempBuf[nulLen++] != 0){
182                }
183                nulVal = (nulLen < srcLength) ? 1 : 0;
184                pIntTarget = pIntTarget + retVal+nulVal;
185                remaining -=(retVal+nulVal);
186
187                /* check if we have reached the source limit*/
188                if(nulLen>=(count)){
189                    break;
190                }
191            }
192        }
193        count = (int32_t)(pIntTarget-intTarget);
194
195        if(0 < count && count <= destCapacity){
196            uprv_memcpy(dest,intTarget,count*sizeof(wchar_t));
197        }
198
199        if(pDestLength){
200            *pDestLength = count;
201        }
202
203        /* free the allocated memory */
204        uprv_free(intTarget);
205
206    }else{
207        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
208    }
209cleanup:
210    /* are we still using stack buffer */
211    if(stackBuffer != saveBuf){
212        uprv_free(saveBuf);
213    }
214    u_terminateWChars(dest,destCapacity,count,pErrorCode);
215
216    u_releaseDefaultConverter(conv);
217
218    return dest;
219}
220#endif
221
222U_CAPI wchar_t* U_EXPORT2
223u_strToWCS(wchar_t *dest,
224           int32_t destCapacity,
225           int32_t *pDestLength,
226           const UChar *src,
227           int32_t srcLength,
228           UErrorCode *pErrorCode){
229
230    /* args check */
231    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
232        return NULL;
233    }
234
235    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
236        (destCapacity<0) || (dest == NULL && destCapacity > 0)
237    ) {
238        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
239        return NULL;
240    }
241
242#ifdef U_WCHAR_IS_UTF16
243    /* wchar_t is UTF-16 just do a memcpy */
244    if(srcLength == -1){
245        srcLength = u_strlen(src);
246    }
247    if(0 < srcLength && srcLength <= destCapacity){
248        uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
249    }
250    if(pDestLength){
251       *pDestLength = srcLength;
252    }
253
254    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
255
256    return dest;
257
258#elif defined U_WCHAR_IS_UTF32
259
260    return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
261                                  src, srcLength, pErrorCode);
262
263#else
264
265    return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
266
267#endif
268
269}
270
271#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
272/* helper function */
273static UChar*
274_strFromWCS( UChar   *dest,
275             int32_t destCapacity,
276             int32_t *pDestLength,
277             const wchar_t *src,
278             int32_t srcLength,
279             UErrorCode *pErrorCode)
280{
281    int32_t retVal =0, count =0 ;
282    UConverter* conv = NULL;
283    UChar* pTarget = NULL;
284    UChar* pTargetLimit = NULL;
285    UChar* target = NULL;
286
287    UChar uStack [_STACK_BUFFER_CAPACITY];
288
289    wchar_t wStack[_STACK_BUFFER_CAPACITY];
290    wchar_t* pWStack = wStack;
291
292
293    char cStack[_STACK_BUFFER_CAPACITY];
294    int32_t cStackCap = _STACK_BUFFER_CAPACITY;
295    char* pCSrc=cStack;
296    char* pCSave=pCSrc;
297    char* pCSrcLimit=NULL;
298
299    const wchar_t* pSrc = src;
300    const wchar_t* pSrcLimit = NULL;
301
302    if(srcLength ==-1){
303        /* if the wchar_t source is null terminated we can safely
304         * assume that there are no embedded nulls, this is a fast
305         * path for null terminated strings.
306         */
307        for(;;){
308            /* convert wchars  to chars */
309            retVal = uprv_wcstombs(pCSrc,src, cStackCap);
310
311            if(retVal == -1){
312                *pErrorCode = U_ILLEGAL_CHAR_FOUND;
313                goto cleanup;
314            }else if(retVal >= (cStackCap-1)){
315                /* Should rarely occur */
316                u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
317                    cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
318                pCSave = pCSrc;
319            }else{
320                /* converted every thing */
321                pCSrc = pCSrc+retVal;
322                break;
323            }
324        }
325
326    }else{
327        /* here the source is not null terminated
328         * so it may have nulls embeded and we need to
329         * do some extra processing
330         */
331        int32_t remaining =cStackCap;
332
333        pSrcLimit = src + srcLength;
334
335        for(;;){
336            register int32_t nulLen = 0;
337
338            /* find nulls in the string */
339            while(nulLen<srcLength && pSrc[nulLen++]!=0){
340            }
341
342            if((pSrc+nulLen) < pSrcLimit){
343                /* check if we have enough room in pCSrc */
344                if(remaining < (nulLen * MB_CUR_MAX)){
345                    /* should rarely occur */
346                    int32_t len = (pCSrc-pCSave);
347                    pCSrc = pCSave;
348                    /* we do not have enough room so grow the buffer*/
349                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
350                           _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
351
352                    pCSave = pCSrc;
353                    pCSrc = pCSave+len;
354                    remaining = cStackCap-(pCSrc - pCSave);
355                }
356
357                /* we have found a null  so convert the
358                 * chunk from begining of non-null char to null
359                 */
360                retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
361
362                if(retVal==-1){
363                    /* an error occurred bail out */
364                    *pErrorCode = U_ILLEGAL_CHAR_FOUND;
365                    goto cleanup;
366                }
367
368                pCSrc += retVal+1 /* already null terminated */;
369
370                pSrc += nulLen; /* skip past the null */
371                srcLength-=nulLen; /* decrement the srcLength */
372                remaining -= (pCSrc-pCSave);
373
374
375            }else{
376                /* the source is not null terminated and we are
377                 * end of source so we copy the source to a temp buffer
378                 * null terminate it and convert wchar_ts to chars
379                 */
380                if(nulLen >= _STACK_BUFFER_CAPACITY){
381                    /* Should rarely occcur */
382                    /* allocate new buffer buffer */
383                    pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
384                    if(pWStack==NULL){
385                        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
386                        goto cleanup;
387                    }
388                }
389                if(nulLen>0){
390                    /* copy the contents to tempStack */
391                    uprv_memcpy(pWStack,pSrc,nulLen*sizeof(wchar_t));
392                }
393
394                /* null terminate the tempBuffer */
395                pWStack[nulLen] =0 ;
396
397                if(remaining < (nulLen * MB_CUR_MAX)){
398                    /* Should rarely occur */
399                    int32_t len = (pCSrc-pCSave);
400                    pCSrc = pCSave;
401                    /* we do not have enough room so grow the buffer*/
402                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
403                           cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
404
405                    pCSave = pCSrc;
406                    pCSrc = pCSave+len;
407                    remaining = cStackCap-(pCSrc - pCSave);
408                }
409                /* convert to chars */
410                retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
411
412                pCSrc += retVal;
413                pSrc  += nulLen;
414                srcLength-=nulLen; /* decrement the srcLength */
415                break;
416            }
417        }
418    }
419
420    /* OK..now we have converted from wchar_ts to chars now
421     * convert chars to UChars
422     */
423    pCSrcLimit = pCSrc;
424    pCSrc = pCSave;
425    pTarget = target= dest;
426    pTargetLimit = dest + destCapacity;
427
428    conv= u_getDefaultConverter(pErrorCode);
429
430    if(U_FAILURE(*pErrorCode)|| conv==NULL){
431        goto cleanup;
432    }
433
434    for(;;) {
435
436        *pErrorCode = U_ZERO_ERROR;
437
438        /* convert to stack buffer*/
439        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
440
441        /* increment count to number written to stack */
442        count+= pTarget - target;
443
444        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
445            target = uStack;
446            pTarget = uStack;
447            pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
448        } else {
449            break;
450        }
451
452    }
453
454    if(pDestLength){
455        *pDestLength =count;
456    }
457
458    u_terminateUChars(dest,destCapacity,count,pErrorCode);
459
460cleanup:
461
462    if(cStack != pCSave){
463        uprv_free(pCSave);
464    }
465
466    if(wStack != pWStack){
467        uprv_free(pWStack);
468    }
469
470    u_releaseDefaultConverter(conv);
471
472    return dest;
473}
474#endif
475
476U_CAPI UChar* U_EXPORT2
477u_strFromWCS(UChar   *dest,
478             int32_t destCapacity,
479             int32_t *pDestLength,
480             const wchar_t *src,
481             int32_t srcLength,
482             UErrorCode *pErrorCode)
483{
484
485    /* args check */
486    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
487        return NULL;
488    }
489
490    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
491        (destCapacity<0) || (dest == NULL && destCapacity > 0)
492    ) {
493        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
494        return NULL;
495    }
496
497#ifdef U_WCHAR_IS_UTF16
498    /* wchar_t is UTF-16 just do a memcpy */
499    if(srcLength == -1){
500        srcLength = u_strlen(src);
501    }
502    if(0 < srcLength && srcLength <= destCapacity){
503        uprv_memcpy(dest,src,srcLength*U_SIZEOF_UCHAR);
504    }
505    if(pDestLength){
506       *pDestLength = srcLength;
507    }
508
509    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
510
511    return dest;
512
513#elif defined U_WCHAR_IS_UTF32
514
515    return u_strFromUTF32(dest, destCapacity, pDestLength,
516                          (UChar32*)src, srcLength, pErrorCode);
517
518#else
519
520    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
521
522#endif
523
524}
525
526#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
527