cstring.c revision c73f511526464f8e56c242df80552e9b0d94ae3d
1/* 2****************************************************************************** 3* 4* Copyright (C) 1997-2011, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* 9* File CSTRING.C 10* 11* @author Helena Shih 12* 13* Modification History: 14* 15* Date Name Description 16* 6/18/98 hshih Created 17* 09/08/98 stephen Added include for ctype, for Mac Port 18* 11/15/99 helena Integrated S/390 IEEE changes. 19****************************************************************************** 20*/ 21 22 23 24#include <stdlib.h> 25#include <stdio.h> 26#include "unicode/utypes.h" 27#include "cmemory.h" 28#include "cstring.h" 29#include "uassert.h" 30 31/* 32 * We hardcode case conversion for invariant characters to match our expectation 33 * and the compiler execution charset. 34 * This prevents problems on systems 35 * - with non-default casing behavior, like Turkish system locales where 36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I 37 * - where there are no lowercase Latin characters at all, or using different 38 * codes (some old EBCDIC codepages) 39 * 40 * This works because the compiler usually runs on a platform where the execution 41 * charset includes all of the invariant characters at their expected 42 * code positions, so that the char * string literals in ICU code match 43 * the char literals here. 44 * 45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 46 * and the set of uppercase Latin letters is discontiguous as well. 47 */ 48 49U_CAPI UBool U_EXPORT2 50uprv_isASCIILetter(char c) { 51#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 52 return 53 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || 54 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); 55#else 56 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 57#endif 58} 59 60U_CAPI char U_EXPORT2 61uprv_toupper(char c) { 62#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 63 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 64 c=(char)(c+('A'-'a')); 65 } 66#else 67 if('a'<=c && c<='z') { 68 c=(char)(c+('A'-'a')); 69 } 70#endif 71 return c; 72} 73 74 75#if 0 76/* 77 * Commented out because cstring.h defines uprv_tolower() to be 78 * the same as either uprv_asciitolower() or uprv_ebcdictolower() 79 * to reduce the amount of code to cover with tests. 80 * 81 * Note that this uprv_tolower() definition is likely to work for most 82 * charset families, not just ASCII and EBCDIC, because its #else branch 83 * is written generically. 84 */ 85U_CAPI char U_EXPORT2 86uprv_tolower(char c) { 87#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 88 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 89 c=(char)(c+('a'-'A')); 90 } 91#else 92 if('A'<=c && c<='Z') { 93 c=(char)(c+('a'-'A')); 94 } 95#endif 96 return c; 97} 98#endif 99 100U_CAPI char U_EXPORT2 101uprv_asciitolower(char c) { 102 if(0x41<=c && c<=0x5a) { 103 c=(char)(c+0x20); 104 } 105 return c; 106} 107 108U_CAPI char U_EXPORT2 109uprv_ebcdictolower(char c) { 110 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 111 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 112 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 113 ) { 114 c=(char)(c-0x40); 115 } 116 return c; 117} 118 119 120U_CAPI char* U_EXPORT2 121T_CString_toLowerCase(char* str) 122{ 123 char* origPtr = str; 124 125 if (str) { 126 do 127 *str = (char)uprv_tolower(*str); 128 while (*(str++)); 129 } 130 131 return origPtr; 132} 133 134U_CAPI char* U_EXPORT2 135T_CString_toUpperCase(char* str) 136{ 137 char* origPtr = str; 138 139 if (str) { 140 do 141 *str = (char)uprv_toupper(*str); 142 while (*(str++)); 143 } 144 145 return origPtr; 146} 147 148/* 149 * Takes a int32_t and fills in a char* string with that number "radix"-based. 150 * Does not handle negative values (makes an empty string for them). 151 * Writes at most 12 chars ("-2147483647" plus NUL). 152 * Returns the length of the string (not including the NUL). 153 */ 154U_CAPI int32_t U_EXPORT2 155T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 156{ 157 char tbuf[30]; 158 int32_t tbx = sizeof(tbuf); 159 uint8_t digit; 160 int32_t length = 0; 161 uint32_t uval; 162 163 U_ASSERT(radix>=2 && radix<=16); 164 uval = (uint32_t) v; 165 if(v<0 && radix == 10) { 166 /* Only in base 10 do we conside numbers to be signed. */ 167 uval = (uint32_t)(-v); 168 buffer[length++] = '-'; 169 } 170 171 tbx = sizeof(tbuf)-1; 172 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 173 do { 174 digit = (uint8_t)(uval % radix); 175 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 176 uval = uval / radix; 177 } while (uval != 0); 178 179 /* copy converted number into user buffer */ 180 uprv_strcpy(buffer+length, tbuf+tbx); 181 length += sizeof(tbuf) - tbx -1; 182 return length; 183} 184 185 186 187/* 188 * Takes a int64_t and fills in a char* string with that number "radix"-based. 189 * Writes at most 21: chars ("-9223372036854775807" plus NUL). 190 * Returns the length of the string, not including the terminating NULL. 191 */ 192U_CAPI int32_t U_EXPORT2 193T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 194{ 195 char tbuf[30]; 196 int32_t tbx = sizeof(tbuf); 197 uint8_t digit; 198 int32_t length = 0; 199 uint64_t uval; 200 201 U_ASSERT(radix>=2 && radix<=16); 202 uval = (uint64_t) v; 203 if(v<0 && radix == 10) { 204 /* Only in base 10 do we conside numbers to be signed. */ 205 uval = (uint64_t)(-v); 206 buffer[length++] = '-'; 207 } 208 209 tbx = sizeof(tbuf)-1; 210 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 211 do { 212 digit = (uint8_t)(uval % radix); 213 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 214 uval = uval / radix; 215 } while (uval != 0); 216 217 /* copy converted number into user buffer */ 218 uprv_strcpy(buffer+length, tbuf+tbx); 219 length += sizeof(tbuf) - tbx -1; 220 return length; 221} 222 223 224U_CAPI int32_t U_EXPORT2 225T_CString_stringToInteger(const char *integerString, int32_t radix) 226{ 227 char *end; 228 return uprv_strtoul(integerString, &end, radix); 229 230} 231 232U_CAPI int U_EXPORT2 233uprv_stricmp(const char *str1, const char *str2) { 234 if(str1==NULL) { 235 if(str2==NULL) { 236 return 0; 237 } else { 238 return -1; 239 } 240 } else if(str2==NULL) { 241 return 1; 242 } else { 243 /* compare non-NULL strings lexically with lowercase */ 244 int rc; 245 unsigned char c1, c2; 246 247 for(;;) { 248 c1=(unsigned char)*str1; 249 c2=(unsigned char)*str2; 250 if(c1==0) { 251 if(c2==0) { 252 return 0; 253 } else { 254 return -1; 255 } 256 } else if(c2==0) { 257 return 1; 258 } else { 259 /* compare non-zero characters with lowercase */ 260 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 261 if(rc!=0) { 262 return rc; 263 } 264 } 265 ++str1; 266 ++str2; 267 } 268 } 269} 270 271U_CAPI int U_EXPORT2 272uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { 273 if(str1==NULL) { 274 if(str2==NULL) { 275 return 0; 276 } else { 277 return -1; 278 } 279 } else if(str2==NULL) { 280 return 1; 281 } else { 282 /* compare non-NULL strings lexically with lowercase */ 283 int rc; 284 unsigned char c1, c2; 285 286 for(; n--;) { 287 c1=(unsigned char)*str1; 288 c2=(unsigned char)*str2; 289 if(c1==0) { 290 if(c2==0) { 291 return 0; 292 } else { 293 return -1; 294 } 295 } else if(c2==0) { 296 return 1; 297 } else { 298 /* compare non-zero characters with lowercase */ 299 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 300 if(rc!=0) { 301 return rc; 302 } 303 } 304 ++str1; 305 ++str2; 306 } 307 } 308 309 return 0; 310} 311 312U_CAPI char* U_EXPORT2 313uprv_strdup(const char *src) { 314 size_t len = uprv_strlen(src) + 1; 315 char *dup = (char *) uprv_malloc(len); 316 317 if (dup) { 318 uprv_memcpy(dup, src, len); 319 } 320 321 return dup; 322} 323 324U_CAPI char* U_EXPORT2 325uprv_strndup(const char *src, int32_t n) { 326 char *dup; 327 328 if(n < 0) { 329 dup = uprv_strdup(src); 330 } else { 331 dup = (char*)uprv_malloc(n+1); 332 if (dup) { 333 uprv_memcpy(dup, src, n); 334 dup[n] = 0; 335 } 336 } 337 338 return dup; 339} 340