1/*
2******************************************************************************
3*
4*   Copyright (C) 1997-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7******************************************************************************
8*
9* File CSTRING.C
10*
11* @author       Helena Shih
12*
13* Modification History:
14*
15*   Date        Name        Description
16*   6/18/98     hshih       Created
17*   09/08/98    stephen     Added include for ctype, for Mac Port
18*   11/15/99    helena      Integrated S/390 IEEE changes.
19******************************************************************************
20*/
21
22
23
24#include <stdlib.h>
25#include <stdio.h>
26#include "unicode/utypes.h"
27#include "cmemory.h"
28#include "cstring.h"
29#include "uassert.h"
30
31/*
32 * We hardcode case conversion for invariant characters to match our expectation
33 * and the compiler execution charset.
34 * This prevents problems on systems
35 * - with non-default casing behavior, like Turkish system locales where
36 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
37 * - where there are no lowercase Latin characters at all, or using different
38 *   codes (some old EBCDIC codepages)
39 *
40 * This works because the compiler usually runs on a platform where the execution
41 * charset includes all of the invariant characters at their expected
42 * code positions, so that the char * string literals in ICU code match
43 * the char literals here.
44 *
45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
46 * and the set of uppercase Latin letters is discontiguous as well.
47 */
48
49U_CAPI UBool U_EXPORT2
50uprv_isASCIILetter(char c) {
51#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
52    return
53        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
54        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
55#else
56    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
57#endif
58}
59
60U_CAPI char U_EXPORT2
61uprv_toupper(char c) {
62#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
63    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
64        c=(char)(c+('A'-'a'));
65    }
66#else
67    if('a'<=c && c<='z') {
68        c=(char)(c+('A'-'a'));
69    }
70#endif
71    return c;
72}
73
74
75#if 0
76/*
77 * Commented out because cstring.h defines uprv_tolower() to be
78 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
79 * to reduce the amount of code to cover with tests.
80 *
81 * Note that this uprv_tolower() definition is likely to work for most
82 * charset families, not just ASCII and EBCDIC, because its #else branch
83 * is written generically.
84 */
85U_CAPI char U_EXPORT2
86uprv_tolower(char c) {
87#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
88    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
89        c=(char)(c+('a'-'A'));
90    }
91#else
92    if('A'<=c && c<='Z') {
93        c=(char)(c+('a'-'A'));
94    }
95#endif
96    return c;
97}
98#endif
99
100U_CAPI char U_EXPORT2
101uprv_asciitolower(char c) {
102    if(0x41<=c && c<=0x5a) {
103        c=(char)(c+0x20);
104    }
105    return c;
106}
107
108U_CAPI char U_EXPORT2
109uprv_ebcdictolower(char c) {
110    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
111        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
112        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
113    ) {
114        c=(char)(c-0x40);
115    }
116    return c;
117}
118
119
120U_CAPI char* U_EXPORT2
121T_CString_toLowerCase(char* str)
122{
123    char* origPtr = str;
124
125    if (str) {
126        do
127            *str = (char)uprv_tolower(*str);
128        while (*(str++));
129    }
130
131    return origPtr;
132}
133
134U_CAPI char* U_EXPORT2
135T_CString_toUpperCase(char* str)
136{
137    char* origPtr = str;
138
139    if (str) {
140        do
141            *str = (char)uprv_toupper(*str);
142        while (*(str++));
143    }
144
145    return origPtr;
146}
147
148/*
149 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
150 * Does not handle negative values (makes an empty string for them).
151 * Writes at most 12 chars ("-2147483647" plus NUL).
152 * Returns the length of the string (not including the NUL).
153 */
154U_CAPI int32_t U_EXPORT2
155T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
156{
157    char      tbuf[30];
158    int32_t   tbx    = sizeof(tbuf);
159    uint8_t   digit;
160    int32_t   length = 0;
161    uint32_t  uval;
162
163    U_ASSERT(radix>=2 && radix<=16);
164    uval = (uint32_t) v;
165    if(v<0 && radix == 10) {
166        /* Only in base 10 do we conside numbers to be signed. */
167        uval = (uint32_t)(-v);
168        buffer[length++] = '-';
169    }
170
171    tbx = sizeof(tbuf)-1;
172    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
173    do {
174        digit = (uint8_t)(uval % radix);
175        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
176        uval  = uval / radix;
177    } while (uval != 0);
178
179    /* copy converted number into user buffer  */
180    uprv_strcpy(buffer+length, tbuf+tbx);
181    length += sizeof(tbuf) - tbx -1;
182    return length;
183}
184
185
186
187/*
188 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
189 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
190 * Returns the length of the string, not including the terminating NULL.
191 */
192U_CAPI int32_t U_EXPORT2
193T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
194{
195    char      tbuf[30];
196    int32_t   tbx    = sizeof(tbuf);
197    uint8_t   digit;
198    int32_t   length = 0;
199    uint64_t  uval;
200
201    U_ASSERT(radix>=2 && radix<=16);
202    uval = (uint64_t) v;
203    if(v<0 && radix == 10) {
204        /* Only in base 10 do we conside numbers to be signed. */
205        uval = (uint64_t)(-v);
206        buffer[length++] = '-';
207    }
208
209    tbx = sizeof(tbuf)-1;
210    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
211    do {
212        digit = (uint8_t)(uval % radix);
213        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
214        uval  = uval / radix;
215    } while (uval != 0);
216
217    /* copy converted number into user buffer  */
218    uprv_strcpy(buffer+length, tbuf+tbx);
219    length += sizeof(tbuf) - tbx -1;
220    return length;
221}
222
223
224U_CAPI int32_t U_EXPORT2
225T_CString_stringToInteger(const char *integerString, int32_t radix)
226{
227    char *end;
228    return uprv_strtoul(integerString, &end, radix);
229
230}
231
232U_CAPI int U_EXPORT2
233uprv_stricmp(const char *str1, const char *str2) {
234    if(str1==NULL) {
235        if(str2==NULL) {
236            return 0;
237        } else {
238            return -1;
239        }
240    } else if(str2==NULL) {
241        return 1;
242    } else {
243        /* compare non-NULL strings lexically with lowercase */
244        int rc;
245        unsigned char c1, c2;
246
247        for(;;) {
248            c1=(unsigned char)*str1;
249            c2=(unsigned char)*str2;
250            if(c1==0) {
251                if(c2==0) {
252                    return 0;
253                } else {
254                    return -1;
255                }
256            } else if(c2==0) {
257                return 1;
258            } else {
259                /* compare non-zero characters with lowercase */
260                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
261                if(rc!=0) {
262                    return rc;
263                }
264            }
265            ++str1;
266            ++str2;
267        }
268    }
269}
270
271U_CAPI int U_EXPORT2
272uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
273    if(str1==NULL) {
274        if(str2==NULL) {
275            return 0;
276        } else {
277            return -1;
278        }
279    } else if(str2==NULL) {
280        return 1;
281    } else {
282        /* compare non-NULL strings lexically with lowercase */
283        int rc;
284        unsigned char c1, c2;
285
286        for(; n--;) {
287            c1=(unsigned char)*str1;
288            c2=(unsigned char)*str2;
289            if(c1==0) {
290                if(c2==0) {
291                    return 0;
292                } else {
293                    return -1;
294                }
295            } else if(c2==0) {
296                return 1;
297            } else {
298                /* compare non-zero characters with lowercase */
299                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
300                if(rc!=0) {
301                    return rc;
302                }
303            }
304            ++str1;
305            ++str2;
306        }
307    }
308
309    return 0;
310}
311
312U_CAPI char* U_EXPORT2
313uprv_strdup(const char *src) {
314    size_t len = uprv_strlen(src) + 1;
315    char *dup = (char *) uprv_malloc(len);
316
317    if (dup) {
318        uprv_memcpy(dup, src, len);
319    }
320
321    return dup;
322}
323
324U_CAPI char* U_EXPORT2
325uprv_strndup(const char *src, int32_t n) {
326    char *dup;
327
328    if(n < 0) {
329        dup = uprv_strdup(src);
330    } else {
331        dup = (char*)uprv_malloc(n+1);
332        if (dup) {
333            uprv_memcpy(dup, src, n);
334            dup[n] = 0;
335        }
336    }
337
338    return dup;
339}
340