1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6*   Copyright (C) 2001-2012, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9*******************************************************************************
10*   file name:  ustr_wcs.cpp
11*   encoding:   US-ASCII
12*   tab size:   8 (not used)
13*   indentation:4
14*
15*   created on: 2004sep07
16*   created by: Markus W. Scherer
17*
18*   u_strToWCS() and u_strFromWCS() functions
19*   moved here from ustrtrns.c for better modularization.
20*/
21
22#include "unicode/utypes.h"
23#include "unicode/ustring.h"
24#include "cstring.h"
25#include "cwchar.h"
26#include "cmemory.h"
27#include "ustr_imp.h"
28#include "ustr_cnv.h"
29
30#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
31
32#define _STACK_BUFFER_CAPACITY 1000
33#define _BUFFER_CAPACITY_MULTIPLIER 2
34
35#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
36// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
37// Then we could change this to work only with wchar_t buffers.
38static inline UBool
39u_growAnyBufferFromStatic(void *context,
40                       void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
41                       int32_t length, int32_t size) {
42    // Use char* not void* to avoid the compiler's strict-aliasing assumptions
43    // and related warnings.
44    char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
45    if(newBuffer!=NULL) {
46        if(length>0) {
47            uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
48        }
49        *pCapacity=reqCapacity;
50    } else {
51        *pCapacity=0;
52    }
53
54    /* release the old pBuffer if it was not statically allocated */
55    if(*pBuffer!=(char *)context) {
56        uprv_free(*pBuffer);
57    }
58
59    *pBuffer=newBuffer;
60    return (UBool)(newBuffer!=NULL);
61}
62
63/* helper function */
64static wchar_t*
65_strToWCS(wchar_t *dest,
66           int32_t destCapacity,
67           int32_t *pDestLength,
68           const UChar *src,
69           int32_t srcLength,
70           UErrorCode *pErrorCode){
71
72    char stackBuffer [_STACK_BUFFER_CAPACITY];
73    char* tempBuf = stackBuffer;
74    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
75    char* tempBufLimit = stackBuffer + tempBufCapacity;
76    UConverter* conv = NULL;
77    char* saveBuf = tempBuf;
78    wchar_t* intTarget=NULL;
79    int32_t intTargetCapacity=0;
80    int count=0,retVal=0;
81
82    const UChar *pSrcLimit =NULL;
83    const UChar *pSrc = src;
84
85    conv = u_getDefaultConverter(pErrorCode);
86
87    if(U_FAILURE(*pErrorCode)){
88        return NULL;
89    }
90
91    if(srcLength == -1){
92        srcLength = u_strlen(pSrc);
93    }
94
95    pSrcLimit = pSrc + srcLength;
96
97    for(;;) {
98        /* reset the error state */
99        *pErrorCode = U_ZERO_ERROR;
100
101        /* convert to chars using default converter */
102        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
103        count =(tempBuf - saveBuf);
104
105        /* This should rarely occur */
106        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
107            tempBuf = saveBuf;
108
109            /* we dont have enough room on the stack grow the buffer */
110            int32_t newCapacity = 2 * srcLength;
111            if(newCapacity <= tempBufCapacity) {
112                newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
113            }
114            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
115                    newCapacity, count, 1)) {
116                goto cleanup;
117            }
118
119           saveBuf = tempBuf;
120           tempBufLimit = tempBuf + tempBufCapacity;
121           tempBuf = tempBuf + count;
122
123        } else {
124            break;
125        }
126    }
127
128    if(U_FAILURE(*pErrorCode)){
129        goto cleanup;
130    }
131
132    /* done with conversion null terminate the char buffer */
133    if(count>=tempBufCapacity){
134        tempBuf = saveBuf;
135        /* we dont have enough room on the stack grow the buffer */
136        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
137                count+1, count, 1)) {
138            goto cleanup;
139        }
140       saveBuf = tempBuf;
141    }
142
143    saveBuf[count]=0;
144
145
146    /* allocate more space than required
147     * here we assume that every char requires
148     * no more than 2 wchar_ts
149     */
150    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
151    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
152
153    if(intTarget){
154
155        int32_t nulLen = 0;
156        int32_t remaining = intTargetCapacity;
157        wchar_t* pIntTarget=intTarget;
158        tempBuf = saveBuf;
159
160        /* now convert the mbs to wcs */
161        for(;;){
162
163            /* we can call the system API since we are sure that
164             * there is atleast 1 null in the input
165             */
166            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
167
168            if(retVal==-1){
169                *pErrorCode = U_INVALID_CHAR_FOUND;
170                break;
171            }else if(retVal== remaining){/* should never occur */
172                int numWritten = (pIntTarget-intTarget);
173                u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
174                                          &intTargetCapacity,
175                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
176                                          numWritten,
177                                          sizeof(wchar_t));
178                pIntTarget = intTarget;
179                remaining=intTargetCapacity;
180
181                if(nulLen!=count){ /*there are embedded nulls*/
182                    pIntTarget+=numWritten;
183                    remaining-=numWritten;
184                }
185
186            }else{
187                int32_t nulVal;
188                /*scan for nulls */
189                /* we donot check for limit since tempBuf is null terminated */
190                while(tempBuf[nulLen++] != 0){
191                }
192                nulVal = (nulLen < srcLength) ? 1 : 0;
193                pIntTarget = pIntTarget + retVal+nulVal;
194                remaining -=(retVal+nulVal);
195
196                /* check if we have reached the source limit*/
197                if(nulLen>=(count)){
198                    break;
199                }
200            }
201        }
202        count = (int32_t)(pIntTarget-intTarget);
203
204        if(0 < count && count <= destCapacity){
205            uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
206        }
207
208        if(pDestLength){
209            *pDestLength = count;
210        }
211
212        /* free the allocated memory */
213        uprv_free(intTarget);
214
215    }else{
216        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
217    }
218cleanup:
219    /* are we still using stack buffer */
220    if(stackBuffer != saveBuf){
221        uprv_free(saveBuf);
222    }
223    u_terminateWChars(dest,destCapacity,count,pErrorCode);
224
225    u_releaseDefaultConverter(conv);
226
227    return dest;
228}
229#endif
230
231U_CAPI wchar_t* U_EXPORT2
232u_strToWCS(wchar_t *dest,
233           int32_t destCapacity,
234           int32_t *pDestLength,
235           const UChar *src,
236           int32_t srcLength,
237           UErrorCode *pErrorCode){
238
239    /* args check */
240    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
241        return NULL;
242    }
243
244    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
245        (destCapacity<0) || (dest == NULL && destCapacity > 0)
246    ) {
247        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
248        return NULL;
249    }
250
251#ifdef U_WCHAR_IS_UTF16
252    /* wchar_t is UTF-16 just do a memcpy */
253    if(srcLength == -1){
254        srcLength = u_strlen(src);
255    }
256    if(0 < srcLength && srcLength <= destCapacity){
257        u_memcpy(dest, src, srcLength);
258    }
259    if(pDestLength){
260       *pDestLength = srcLength;
261    }
262
263    u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
264
265    return dest;
266
267#elif defined U_WCHAR_IS_UTF32
268
269    return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
270                                  src, srcLength, pErrorCode);
271
272#else
273
274    return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
275
276#endif
277
278}
279
280#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
281/* helper function */
282static UChar*
283_strFromWCS( UChar   *dest,
284             int32_t destCapacity,
285             int32_t *pDestLength,
286             const wchar_t *src,
287             int32_t srcLength,
288             UErrorCode *pErrorCode)
289{
290    int32_t retVal =0, count =0 ;
291    UConverter* conv = NULL;
292    UChar* pTarget = NULL;
293    UChar* pTargetLimit = NULL;
294    UChar* target = NULL;
295
296    UChar uStack [_STACK_BUFFER_CAPACITY];
297
298    wchar_t wStack[_STACK_BUFFER_CAPACITY];
299    wchar_t* pWStack = wStack;
300
301
302    char cStack[_STACK_BUFFER_CAPACITY];
303    int32_t cStackCap = _STACK_BUFFER_CAPACITY;
304    char* pCSrc=cStack;
305    char* pCSave=pCSrc;
306    char* pCSrcLimit=NULL;
307
308    const wchar_t* pSrc = src;
309    const wchar_t* pSrcLimit = NULL;
310
311    if(srcLength ==-1){
312        /* if the wchar_t source is null terminated we can safely
313         * assume that there are no embedded nulls, this is a fast
314         * path for null terminated strings.
315         */
316        for(;;){
317            /* convert wchars  to chars */
318            retVal = uprv_wcstombs(pCSrc,src, cStackCap);
319
320            if(retVal == -1){
321                *pErrorCode = U_ILLEGAL_CHAR_FOUND;
322                goto cleanup;
323            }else if(retVal >= (cStackCap-1)){
324                /* Should rarely occur */
325                u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
326                    cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
327                pCSave = pCSrc;
328            }else{
329                /* converted every thing */
330                pCSrc = pCSrc+retVal;
331                break;
332            }
333        }
334
335    }else{
336        /* here the source is not null terminated
337         * so it may have nulls embeded and we need to
338         * do some extra processing
339         */
340        int32_t remaining =cStackCap;
341
342        pSrcLimit = src + srcLength;
343
344        for(;;){
345            register int32_t nulLen = 0;
346
347            /* find nulls in the string */
348            while(nulLen<srcLength && pSrc[nulLen++]!=0){
349            }
350
351            if((pSrc+nulLen) < pSrcLimit){
352                /* check if we have enough room in pCSrc */
353                if(remaining < (nulLen * MB_CUR_MAX)){
354                    /* should rarely occur */
355                    int32_t len = (pCSrc-pCSave);
356                    pCSrc = pCSave;
357                    /* we do not have enough room so grow the buffer*/
358                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
359                           _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
360
361                    pCSave = pCSrc;
362                    pCSrc = pCSave+len;
363                    remaining = cStackCap-(pCSrc - pCSave);
364                }
365
366                /* we have found a null  so convert the
367                 * chunk from begining of non-null char to null
368                 */
369                retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
370
371                if(retVal==-1){
372                    /* an error occurred bail out */
373                    *pErrorCode = U_ILLEGAL_CHAR_FOUND;
374                    goto cleanup;
375                }
376
377                pCSrc += retVal+1 /* already null terminated */;
378
379                pSrc += nulLen; /* skip past the null */
380                srcLength-=nulLen; /* decrement the srcLength */
381                remaining -= (pCSrc-pCSave);
382
383
384            }else{
385                /* the source is not null terminated and we are
386                 * end of source so we copy the source to a temp buffer
387                 * null terminate it and convert wchar_ts to chars
388                 */
389                if(nulLen >= _STACK_BUFFER_CAPACITY){
390                    /* Should rarely occcur */
391                    /* allocate new buffer buffer */
392                    pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
393                    if(pWStack==NULL){
394                        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
395                        goto cleanup;
396                    }
397                }
398                if(nulLen>0){
399                    /* copy the contents to tempStack */
400                    uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
401                }
402
403                /* null terminate the tempBuffer */
404                pWStack[nulLen] =0 ;
405
406                if(remaining < (nulLen * MB_CUR_MAX)){
407                    /* Should rarely occur */
408                    int32_t len = (pCSrc-pCSave);
409                    pCSrc = pCSave;
410                    /* we do not have enough room so grow the buffer*/
411                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
412                           cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
413
414                    pCSave = pCSrc;
415                    pCSrc = pCSave+len;
416                    remaining = cStackCap-(pCSrc - pCSave);
417                }
418                /* convert to chars */
419                retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
420
421                pCSrc += retVal;
422                pSrc  += nulLen;
423                srcLength-=nulLen; /* decrement the srcLength */
424                break;
425            }
426        }
427    }
428
429    /* OK..now we have converted from wchar_ts to chars now
430     * convert chars to UChars
431     */
432    pCSrcLimit = pCSrc;
433    pCSrc = pCSave;
434    pTarget = target= dest;
435    pTargetLimit = dest + destCapacity;
436
437    conv= u_getDefaultConverter(pErrorCode);
438
439    if(U_FAILURE(*pErrorCode)|| conv==NULL){
440        goto cleanup;
441    }
442
443    for(;;) {
444
445        *pErrorCode = U_ZERO_ERROR;
446
447        /* convert to stack buffer*/
448        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
449
450        /* increment count to number written to stack */
451        count+= pTarget - target;
452
453        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
454            target = uStack;
455            pTarget = uStack;
456            pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
457        } else {
458            break;
459        }
460
461    }
462
463    if(pDestLength){
464        *pDestLength =count;
465    }
466
467    u_terminateUChars(dest,destCapacity,count,pErrorCode);
468
469cleanup:
470
471    if(cStack != pCSave){
472        uprv_free(pCSave);
473    }
474
475    if(wStack != pWStack){
476        uprv_free(pWStack);
477    }
478
479    u_releaseDefaultConverter(conv);
480
481    return dest;
482}
483#endif
484
485U_CAPI UChar* U_EXPORT2
486u_strFromWCS(UChar   *dest,
487             int32_t destCapacity,
488             int32_t *pDestLength,
489             const wchar_t *src,
490             int32_t srcLength,
491             UErrorCode *pErrorCode)
492{
493
494    /* args check */
495    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
496        return NULL;
497    }
498
499    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
500        (destCapacity<0) || (dest == NULL && destCapacity > 0)
501    ) {
502        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
503        return NULL;
504    }
505
506#ifdef U_WCHAR_IS_UTF16
507    /* wchar_t is UTF-16 just do a memcpy */
508    if(srcLength == -1){
509        srcLength = u_strlen((const UChar *)src);
510    }
511    if(0 < srcLength && srcLength <= destCapacity){
512        u_memcpy(dest, src, srcLength);
513    }
514    if(pDestLength){
515       *pDestLength = srcLength;
516    }
517
518    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
519
520    return dest;
521
522#elif defined U_WCHAR_IS_UTF32
523
524    return u_strFromUTF32(dest, destCapacity, pDestLength,
525                          (UChar32*)src, srcLength, pErrorCode);
526
527#else
528
529    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
530
531#endif
532
533}
534
535#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
536