1/* 2****************************************************************************** 3* 4* Copyright (C) 1997-2003, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7****************************************************************************** 8* 9* File CSTRING.C 10* 11* @author Helena Shih 12* 13* Modification History: 14* 15* Date Name Description 16* 6/18/98 hshih Created 17* 09/08/98 stephen Added include for ctype, for Mac Port 18* 11/15/99 helena Integrated S/390 IEEE changes. 19****************************************************************************** 20*/ 21 22 23 24#include <stdlib.h> 25#include <stdio.h> 26#include "unicode/utypes.h" 27#include "cmemory.h" 28#include "cstring.h" 29#include "uassert.h" 30 31/* 32 * We hardcode case conversion for invariant characters to match our expectation 33 * and the compiler execution charset. 34 * This prevents problems on systems 35 * - with non-default casing behavior, like Turkish system locales where 36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I 37 * - where there are no lowercase Latin characters at all, or using different 38 * codes (some old EBCDIC codepages) 39 * 40 * This works because the compiler usually runs on a platform where the execution 41 * charset includes all of the invariant characters at their expected 42 * code positions, so that the char * string literals in ICU code match 43 * the char literals here. 44 * 45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 46 * and the set of uppercase Latin letters is discontiguous as well. 47 */ 48 49U_CAPI char U_EXPORT2 50uprv_toupper(char c) { 51#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 52 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 53 c=(char)(c+('A'-'a')); 54 } 55#else 56 if('a'<=c && c<='z') { 57 c=(char)(c+('A'-'a')); 58 } 59#endif 60 return c; 61} 62 63 64#if 0 65/* 66 * Commented out because cstring.h defines uprv_tolower() to be 67 * the same as either uprv_asciitolower() or uprv_ebcdictolower() 68 * to reduce the amount of code to cover with tests. 69 * 70 * Note that this uprv_tolower() definition is likely to work for most 71 * charset families, not just ASCII and EBCDIC, because its #else branch 72 * is written generically. 73 */ 74U_CAPI char U_EXPORT2 75uprv_tolower(char c) { 76#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 77 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 78 c=(char)(c+('a'-'A')); 79 } 80#else 81 if('A'<=c && c<='Z') { 82 c=(char)(c+('a'-'A')); 83 } 84#endif 85 return c; 86} 87#endif 88 89U_CAPI char U_EXPORT2 90uprv_asciitolower(char c) { 91 if(0x41<=c && c<=0x5a) { 92 c=(char)(c+0x20); 93 } 94 return c; 95} 96 97U_CAPI char U_EXPORT2 98uprv_ebcdictolower(char c) { 99 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 100 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 101 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 102 ) { 103 c=(char)(c-0x40); 104 } 105 return c; 106} 107 108 109U_CAPI char* U_EXPORT2 110T_CString_toLowerCase(char* str) 111{ 112 char* origPtr = str; 113 114 if (str) { 115 do 116 *str = (char)uprv_tolower(*str); 117 while (*(str++)); 118 } 119 120 return origPtr; 121} 122 123U_CAPI char* U_EXPORT2 124T_CString_toUpperCase(char* str) 125{ 126 char* origPtr = str; 127 128 if (str) { 129 do 130 *str = (char)uprv_toupper(*str); 131 while (*(str++)); 132 } 133 134 return origPtr; 135} 136 137/* 138 * Takes a int32_t and fills in a char* string with that number "radix"-based. 139 * Does not handle negative values (makes an empty string for them). 140 * Writes at most 12 chars ("-2147483647" plus NUL). 141 * Returns the length of the string (not including the NUL). 142 */ 143U_CAPI int32_t U_EXPORT2 144T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 145{ 146 char tbuf[30]; 147 int32_t tbx = sizeof(tbuf); 148 uint8_t digit; 149 int32_t length = 0; 150 uint32_t uval; 151 152 U_ASSERT(radix>=2 && radix<=16); 153 uval = (uint32_t) v; 154 if(v<0 && radix == 10) { 155 /* Only in base 10 do we conside numbers to be signed. */ 156 uval = (uint32_t)(-v); 157 buffer[length++] = '-'; 158 } 159 160 tbx = sizeof(tbuf)-1; 161 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 162 do { 163 digit = (uint8_t)(uval % radix); 164 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 165 uval = uval / radix; 166 } while (uval != 0); 167 168 /* copy converted number into user buffer */ 169 uprv_strcpy(buffer+length, tbuf+tbx); 170 length += sizeof(tbuf) - tbx -1; 171 return length; 172} 173 174 175 176/* 177 * Takes a int64_t and fills in a char* string with that number "radix"-based. 178 * Writes at most 21: chars ("-9223372036854775807" plus NUL). 179 * Returns the length of the string, not including the terminating NULL. 180 */ 181U_CAPI int32_t U_EXPORT2 182T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 183{ 184 char tbuf[30]; 185 int32_t tbx = sizeof(tbuf); 186 uint8_t digit; 187 int32_t length = 0; 188 uint64_t uval; 189 190 U_ASSERT(radix>=2 && radix<=16); 191 uval = (uint64_t) v; 192 if(v<0 && radix == 10) { 193 /* Only in base 10 do we conside numbers to be signed. */ 194 uval = (uint64_t)(-v); 195 buffer[length++] = '-'; 196 } 197 198 tbx = sizeof(tbuf)-1; 199 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 200 do { 201 digit = (uint8_t)(uval % radix); 202 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 203 uval = uval / radix; 204 } while (uval != 0); 205 206 /* copy converted number into user buffer */ 207 uprv_strcpy(buffer+length, tbuf+tbx); 208 length += sizeof(tbuf) - tbx -1; 209 return length; 210} 211 212 213U_CAPI int32_t U_EXPORT2 214T_CString_stringToInteger(const char *integerString, int32_t radix) 215{ 216 char *end; 217 return uprv_strtoul(integerString, &end, radix); 218 219} 220 221U_CAPI int U_EXPORT2 222T_CString_stricmp(const char *str1, const char *str2) { 223 if(str1==NULL) { 224 if(str2==NULL) { 225 return 0; 226 } else { 227 return -1; 228 } 229 } else if(str2==NULL) { 230 return 1; 231 } else { 232 /* compare non-NULL strings lexically with lowercase */ 233 int rc; 234 unsigned char c1, c2; 235 236 for(;;) { 237 c1=(unsigned char)*str1; 238 c2=(unsigned char)*str2; 239 if(c1==0) { 240 if(c2==0) { 241 return 0; 242 } else { 243 return -1; 244 } 245 } else if(c2==0) { 246 return 1; 247 } else { 248 /* compare non-zero characters with lowercase */ 249 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 250 if(rc!=0) { 251 return rc; 252 } 253 } 254 ++str1; 255 ++str2; 256 } 257 } 258} 259 260U_CAPI int U_EXPORT2 261T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) { 262 if(str1==NULL) { 263 if(str2==NULL) { 264 return 0; 265 } else { 266 return -1; 267 } 268 } else if(str2==NULL) { 269 return 1; 270 } else { 271 /* compare non-NULL strings lexically with lowercase */ 272 int rc; 273 unsigned char c1, c2; 274 275 for(; n--;) { 276 c1=(unsigned char)*str1; 277 c2=(unsigned char)*str2; 278 if(c1==0) { 279 if(c2==0) { 280 return 0; 281 } else { 282 return -1; 283 } 284 } else if(c2==0) { 285 return 1; 286 } else { 287 /* compare non-zero characters with lowercase */ 288 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 289 if(rc!=0) { 290 return rc; 291 } 292 } 293 ++str1; 294 ++str2; 295 } 296 } 297 298 return 0; 299} 300 301U_CAPI char* U_EXPORT2 302uprv_strdup(const char *src) { 303 size_t len = uprv_strlen(src) + 1; 304 char *dup = (char *) uprv_malloc(len); 305 306 if (dup) { 307 uprv_memcpy(dup, src, len); 308 } 309 310 return dup; 311} 312 313U_CAPI char* U_EXPORT2 314uprv_strndup(const char *src, int32_t n) { 315 char *dup; 316 317 if(n < 0) { 318 dup = uprv_strdup(src); 319 } else { 320 dup = (char*)uprv_malloc(n+1); 321 if (dup) { 322 uprv_memcpy(dup, src, n); 323 dup[n] = 0; 324 } 325 } 326 327 return dup; 328} 329