ustrcase.c revision b13da9df870a61b11249bf741347908dbea0edd8
1b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/* 2b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project******************************************************************************* 3b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* 4b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* Copyright (C) 2001-2007, International Business Machines 5b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* Corporation and others. All Rights Reserved. 6b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* 7b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project******************************************************************************* 8b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* file name: ustrcase.c 9b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* encoding: US-ASCII 10b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* tab size: 8 (not used) 11b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* indentation:4 12b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* 13b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* created on: 2002feb20 14b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* created by: Markus W. Scherer 15b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* 16b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* Implementation file for string casing C API functions. 17b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* Uses functions from uchar.c for basic functionality that requires access 18b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project* to the Unicode Character Database (uprops.dat). 19b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project*/ 20b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 21b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/utypes.h" 22b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/uloc.h" 23b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ustring.h" 24b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ucasemap.h" 25b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unicode/ubrk.h" 26b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "cmemory.h" 27b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "ucase.h" 28b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "unormimp.h" 29b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project#include "ustr_imp.h" 30b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 31b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/* string casing ------------------------------------------------------------ */ 32b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 33b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 34b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectstatic U_INLINE int32_t 35b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source ProjectappendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 36b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project int32_t result, const UChar *s) { 37b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project UChar32 c; 38b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project int32_t length; 39b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 40b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* decode the result */ 41b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(result<0) { 42b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* (not) original code point */ 43b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project c=~result; 44b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project length=-1; 45b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else if(result<=UCASE_MAX_STRING_LENGTH) { 46b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project c=U_SENTINEL; 47b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project length=result; 48d07d5a72938fd52415368c2320fc29575ae9a0c3Elliott Hughes } else { 49b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project c=result; 50b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project length=-1; 51b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 52d07d5a72938fd52415368c2320fc29575ae9a0c3Elliott Hughes 53b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(destIndex<destCapacity) { 54b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* append the result */ 55b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(length<0) { 56b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* code point */ 57b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project UBool isError=FALSE; 58b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project U16_APPEND(dest, destIndex, destCapacity, c, isError); 59b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(isError) { 60b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* overflow, nothing written */ 61b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project destIndex+=U16_LENGTH(c); 62b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 63b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 64b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* string */ 65b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if((destIndex+length)<=destCapacity) { 66b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project while(length>0) { 67b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project dest[destIndex++]=*s++; 68b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project --length; 69b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 70b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 71b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* overflow */ 72b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project destIndex+=length; 73b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 74b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 75b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 76b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* preflight */ 77b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(length<0) { 78b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project destIndex+=U16_LENGTH(c); 79b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 80b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project destIndex+=length; 81b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 82b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 83b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project return destIndex; 84b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project} 85b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 86b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectstatic UChar32 U_CALLCONV 87b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Projectutf16_caseContextIterator(void *context, int8_t dir) { 88b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project UCaseContext *csc=(UCaseContext *)context; 89b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project UChar32 c; 90b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 91b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(dir<0) { 92b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* reset for backward iteration */ 93b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project csc->index=csc->cpStart; 94b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project csc->dir=dir; 95b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else if(dir>0) { 96b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* reset for forward iteration */ 97b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project csc->index=csc->cpLimit; 98b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project csc->dir=dir; 99b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 100b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project /* continue current iteration direction */ 101b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project dir=csc->dir; 102b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 103b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project 104b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(dir<0) { 105b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(csc->start<csc->index) { 106b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 107b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project return c; 108b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 109b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } else { 110b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project if(csc->index<csc->limit) { 111b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 112b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project return c; 113b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 114b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project } 115b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project return U_SENTINEL; 116b80e287d54a028e744f9fe412840a89ec7f8084bThe Android Open Source Project} 117 118/* 119 * Case-maps [srcStart..srcLimit[ but takes 120 * context [0..srcLength[ into account. 121 */ 122static int32_t 123_caseMap(const UCaseMap *csm, UCaseMapFull *map, 124 UChar *dest, int32_t destCapacity, 125 const UChar *src, UCaseContext *csc, 126 int32_t srcStart, int32_t srcLimit, 127 UErrorCode *pErrorCode) { 128 const UChar *s; 129 UChar32 c, c2; 130 int32_t srcIndex, destIndex; 131 int32_t locCache; 132 133 locCache=csm->locCache; 134 135 /* case mapping loop */ 136 srcIndex=srcStart; 137 destIndex=0; 138 while(srcIndex<srcLimit) { 139 csc->cpStart=srcIndex; 140 U16_NEXT(src, srcIndex, srcLimit, c); 141 csc->cpLimit=srcIndex; 142 c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 143 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 144 /* fast path version of appendResult() for BMP results */ 145 dest[destIndex++]=(UChar)c2; 146 } else { 147 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 148 } 149 } 150 151 if(destIndex>destCapacity) { 152 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 153 } 154 return destIndex; 155} 156 157static void 158setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 159 /* 160 * We could call ucasemap_setLocale(), but here we really only care about 161 * the initial language subtag, we need not return the real string via 162 * ucasemap_getLocale(), and we don't care about only getting "x" from 163 * "x-some-thing" etc. 164 * 165 * We ignore locales with a longer-than-3 initial subtag. 166 * 167 * We also do not fill in the locCache because it is rarely used, 168 * and not worth setting unless we reuse it for many case mapping operations. 169 * (That's why UCaseMap was created.) 170 */ 171 int i; 172 char c; 173 174 /* the internal functions require locale!=NULL */ 175 if(locale==NULL) { 176 locale=uloc_getDefault(); 177 } 178 for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { 179 csm->locale[i]=c; 180 } 181 if(i<=3) { 182 csm->locale[i]=0; /* Up to 3 non-separator characters. */ 183 } else { 184 csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ 185 } 186} 187 188/* 189 * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. 190 * Do this fast because it is called with every function call. 191 */ 192static U_INLINE void 193setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 194 if(csm->csp==NULL) { 195 csm->csp=ucase_getSingleton(pErrorCode); 196 if(U_FAILURE(*pErrorCode)) { 197 return; 198 } 199 } 200 if(locale!=NULL && locale[0]==0) { 201 csm->locale[0]=0; 202 } else { 203 setTempCaseMapLocale(csm, locale, pErrorCode); 204 } 205} 206 207#if !UCONFIG_NO_BREAK_ITERATION 208 209/* 210 * Internal titlecasing function. 211 */ 212static int32_t 213_toTitle(UCaseMap *csm, 214 UChar *dest, int32_t destCapacity, 215 const UChar *src, UCaseContext *csc, 216 int32_t srcLength, 217 UErrorCode *pErrorCode) { 218 const UChar *s; 219 UChar32 c; 220 int32_t prev, titleStart, titleLimit, index, destIndex, length; 221 UBool isFirstIndex; 222 223 if(csm->iter!=NULL) { 224 ubrk_setText(csm->iter, src, srcLength, pErrorCode); 225 } else { 226 csm->iter=ubrk_open(UBRK_WORD, csm->locale, 227 src, srcLength, 228 pErrorCode); 229 } 230 if(U_FAILURE(*pErrorCode)) { 231 return 0; 232 } 233 234 /* set up local variables */ 235 destIndex=0; 236 prev=0; 237 isFirstIndex=TRUE; 238 239 /* titlecasing loop */ 240 while(prev<srcLength) { 241 /* find next index where to titlecase */ 242 if(isFirstIndex) { 243 isFirstIndex=FALSE; 244 index=ubrk_first(csm->iter); 245 } else { 246 index=ubrk_next(csm->iter); 247 } 248 if(index==UBRK_DONE || index>srcLength) { 249 index=srcLength; 250 } 251 252 /* 253 * Unicode 4 & 5 section 3.13 Default Case Operations: 254 * 255 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 256 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 257 * cased character F. If F exists, map F to default_title(F); then map each 258 * subsequent character C to default_lower(C). 259 * 260 * In this implementation, segment [prev..index[ into 3 parts: 261 * a) uncased characters (copy as-is) [prev..titleStart[ 262 * b) first case letter (titlecase) [titleStart..titleLimit[ 263 * c) subsequent characters (lowercase) [titleLimit..index[ 264 */ 265 if(prev<index) { 266 /* find and copy uncased characters [prev..titleStart[ */ 267 titleStart=titleLimit=prev; 268 U16_NEXT(src, titleLimit, index, c); 269 if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 270 /* Adjust the titlecasing index (titleStart) to the next cased character. */ 271 for(;;) { 272 titleStart=titleLimit; 273 if(titleLimit==index) { 274 /* 275 * only uncased characters in [prev..index[ 276 * stop with titleStart==titleLimit==index 277 */ 278 break; 279 } 280 U16_NEXT(src, titleLimit, index, c); 281 if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 282 break; /* cased letter at [titleStart..titleLimit[ */ 283 } 284 } 285 length=titleStart-prev; 286 if(length>0) { 287 if((destIndex+length)<=destCapacity) { 288 uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 289 } 290 destIndex+=length; 291 } 292 } 293 294 if(titleStart<titleLimit) { 295 /* titlecase c which is from [titleStart..titleLimit[ */ 296 csc->cpStart=titleStart; 297 csc->cpLimit=titleLimit; 298 c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); 299 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 300 301 /* lowercase [titleLimit..index[ */ 302 if(titleLimit<index) { 303 if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 304 /* Normal operation: Lowercase the rest of the word. */ 305 destIndex+= 306 _caseMap( 307 csm, ucase_toFullLower, 308 dest+destIndex, destCapacity-destIndex, 309 src, csc, 310 titleLimit, index, 311 pErrorCode); 312 } else { 313 /* Optionally just copy the rest of the word unchanged. */ 314 length=index-titleLimit; 315 if((destIndex+length)<=destCapacity) { 316 uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 317 } 318 destIndex+=length; 319 } 320 } 321 } 322 } 323 324 prev=index; 325 } 326 327 if(destIndex>destCapacity) { 328 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 329 } 330 return destIndex; 331} 332 333#endif 334 335/* functions available in the common library (for unistr_case.cpp) */ 336 337U_CFUNC int32_t 338ustr_toLower(const UCaseProps *csp, 339 UChar *dest, int32_t destCapacity, 340 const UChar *src, int32_t srcLength, 341 const char *locale, 342 UErrorCode *pErrorCode) { 343 UCaseMap csm={ NULL }; 344 UCaseContext csc={ NULL }; 345 346 csm.csp=csp; 347 setTempCaseMap(&csm, locale, pErrorCode); 348 csc.p=(void *)src; 349 csc.limit=srcLength; 350 351 return _caseMap(&csm, ucase_toFullLower, 352 dest, destCapacity, 353 src, &csc, 0, srcLength, 354 pErrorCode); 355} 356 357U_CFUNC int32_t 358ustr_toUpper(const UCaseProps *csp, 359 UChar *dest, int32_t destCapacity, 360 const UChar *src, int32_t srcLength, 361 const char *locale, 362 UErrorCode *pErrorCode) { 363 UCaseMap csm={ NULL }; 364 UCaseContext csc={ NULL }; 365 366 csm.csp=csp; 367 setTempCaseMap(&csm, locale, pErrorCode); 368 csc.p=(void *)src; 369 csc.limit=srcLength; 370 371 return _caseMap(&csm, ucase_toFullUpper, 372 dest, destCapacity, 373 src, &csc, 0, srcLength, 374 pErrorCode); 375} 376 377#if !UCONFIG_NO_BREAK_ITERATION 378 379U_CFUNC int32_t 380ustr_toTitle(const UCaseProps *csp, 381 UChar *dest, int32_t destCapacity, 382 const UChar *src, int32_t srcLength, 383 UBreakIterator *titleIter, 384 const char *locale, uint32_t options, 385 UErrorCode *pErrorCode) { 386 UCaseMap csm={ NULL }; 387 UCaseContext csc={ NULL }; 388 int32_t length; 389 390 csm.csp=csp; 391 csm.iter=titleIter; 392 csm.options=options; 393 setTempCaseMap(&csm, locale, pErrorCode); 394 csc.p=(void *)src; 395 csc.limit=srcLength; 396 397 length=_toTitle(&csm, 398 dest, destCapacity, 399 src, &csc, srcLength, 400 pErrorCode); 401 if(titleIter==NULL && csm.iter!=NULL) { 402 ubrk_close(csm.iter); 403 } 404 return length; 405} 406 407#endif 408 409U_CFUNC int32_t 410ustr_foldCase(const UCaseProps *csp, 411 UChar *dest, int32_t destCapacity, 412 const UChar *src, int32_t srcLength, 413 uint32_t options, 414 UErrorCode *pErrorCode) { 415 int32_t srcIndex, destIndex; 416 417 const UChar *s; 418 UChar32 c, c2; 419 420 /* case mapping loop */ 421 srcIndex=destIndex=0; 422 while(srcIndex<srcLength) { 423 U16_NEXT(src, srcIndex, srcLength, c); 424 c=ucase_toFullFolding(csp, c, &s, options); 425 if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 426 /* fast path version of appendResult() for BMP results */ 427 dest[destIndex++]=(UChar)c2; 428 } else { 429 destIndex=appendResult(dest, destIndex, destCapacity, c, s); 430 } 431 } 432 433 if(destIndex>destCapacity) { 434 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 435 } 436 return destIndex; 437} 438 439/* 440 * Implement argument checking and buffer handling 441 * for string case mapping as a common function. 442 */ 443 444/* common internal function for public API functions */ 445 446static int32_t 447caseMap(const UCaseMap *csm, 448 UChar *dest, int32_t destCapacity, 449 const UChar *src, int32_t srcLength, 450 int32_t toWhichCase, 451 UErrorCode *pErrorCode) { 452 UChar buffer[300]; 453 UChar *temp; 454 455 int32_t destLength; 456 457 /* check argument values */ 458 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 459 return 0; 460 } 461 if( destCapacity<0 || 462 (dest==NULL && destCapacity>0) || 463 src==NULL || 464 srcLength<-1 465 ) { 466 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 467 return 0; 468 } 469 470 /* get the string length */ 471 if(srcLength==-1) { 472 srcLength=u_strlen(src); 473 } 474 475 /* check for overlapping source and destination */ 476 if( dest!=NULL && 477 ((src>=dest && src<(dest+destCapacity)) || 478 (dest>=src && dest<(src+srcLength))) 479 ) { 480 /* overlap: provide a temporary destination buffer and later copy the result */ 481 if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { 482 /* the stack buffer is large enough */ 483 temp=buffer; 484 } else { 485 /* allocate a buffer */ 486 temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 487 if(temp==NULL) { 488 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 489 return 0; 490 } 491 } 492 } else { 493 temp=dest; 494 } 495 496 destLength=0; 497 498 if(toWhichCase==FOLD_CASE) { 499 destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, 500 csm->options, pErrorCode); 501 } else { 502 UCaseContext csc={ NULL }; 503 504 csc.p=(void *)src; 505 csc.limit=srcLength; 506 507 if(toWhichCase==TO_LOWER) { 508 destLength=_caseMap(csm, ucase_toFullLower, 509 temp, destCapacity, 510 src, &csc, 511 0, srcLength, 512 pErrorCode); 513 } else if(toWhichCase==TO_UPPER) { 514 destLength=_caseMap(csm, ucase_toFullUpper, 515 temp, destCapacity, 516 src, &csc, 517 0, srcLength, 518 pErrorCode); 519 } else /* if(toWhichCase==TO_TITLE) */ { 520#if UCONFIG_NO_BREAK_ITERATION 521 *pErrorCode=U_UNSUPPORTED_ERROR; 522#else 523 /* UCaseMap is actually non-const in toTitle() APIs. */ 524 destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, 525 src, &csc, srcLength, 526 pErrorCode); 527#endif 528 } 529 } 530 if(temp!=dest) { 531 /* copy the result string to the destination buffer */ 532 if(destLength>0) { 533 int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 534 if(copyLength>0) { 535 uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 536 } 537 } 538 if(temp!=buffer) { 539 uprv_free(temp); 540 } 541 } 542 543 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 544} 545 546/* public API functions */ 547 548U_CAPI int32_t U_EXPORT2 549u_strToLower(UChar *dest, int32_t destCapacity, 550 const UChar *src, int32_t srcLength, 551 const char *locale, 552 UErrorCode *pErrorCode) { 553 UCaseMap csm={ NULL }; 554 setTempCaseMap(&csm, locale, pErrorCode); 555 return caseMap(&csm, 556 dest, destCapacity, 557 src, srcLength, 558 TO_LOWER, pErrorCode); 559} 560 561U_CAPI int32_t U_EXPORT2 562u_strToUpper(UChar *dest, int32_t destCapacity, 563 const UChar *src, int32_t srcLength, 564 const char *locale, 565 UErrorCode *pErrorCode) { 566 UCaseMap csm={ NULL }; 567 setTempCaseMap(&csm, locale, pErrorCode); 568 return caseMap(&csm, 569 dest, destCapacity, 570 src, srcLength, 571 TO_UPPER, pErrorCode); 572} 573 574#if !UCONFIG_NO_BREAK_ITERATION 575 576U_CAPI int32_t U_EXPORT2 577u_strToTitle(UChar *dest, int32_t destCapacity, 578 const UChar *src, int32_t srcLength, 579 UBreakIterator *titleIter, 580 const char *locale, 581 UErrorCode *pErrorCode) { 582 UCaseMap csm={ NULL }; 583 int32_t length; 584 585 csm.iter=titleIter; 586 setTempCaseMap(&csm, locale, pErrorCode); 587 length=caseMap(&csm, 588 dest, destCapacity, 589 src, srcLength, 590 TO_TITLE, pErrorCode); 591 if(titleIter==NULL && csm.iter!=NULL) { 592 ubrk_close(csm.iter); 593 } 594 return length; 595} 596 597U_CAPI int32_t U_EXPORT2 598ucasemap_toTitle(UCaseMap *csm, 599 UChar *dest, int32_t destCapacity, 600 const UChar *src, int32_t srcLength, 601 UErrorCode *pErrorCode) { 602 return caseMap(csm, 603 dest, destCapacity, 604 src, srcLength, 605 TO_TITLE, pErrorCode); 606} 607 608#endif 609 610U_CAPI int32_t U_EXPORT2 611u_strFoldCase(UChar *dest, int32_t destCapacity, 612 const UChar *src, int32_t srcLength, 613 uint32_t options, 614 UErrorCode *pErrorCode) { 615 UCaseMap csm={ NULL }; 616 csm.csp=ucase_getSingleton(pErrorCode); 617 csm.options=options; 618 return caseMap(&csm, 619 dest, destCapacity, 620 src, srcLength, 621 FOLD_CASE, pErrorCode); 622} 623 624/* case-insensitive string comparisons -------------------------------------- */ 625 626/* 627 * This function is a copy of unorm_cmpEquivFold() minus the parts for 628 * canonical equivalence. 629 * Keep the functions in sync, and see there for how this works. 630 * The duplication is for modularization: 631 * It makes caseless (but not canonical caseless) matches independent of 632 * the normalization code. 633 */ 634 635/* stack element for previous-level source/decomposition pointers */ 636struct CmpEquivLevel { 637 const UChar *start, *s, *limit; 638}; 639typedef struct CmpEquivLevel CmpEquivLevel; 640 641/* internal function */ 642U_CFUNC int32_t 643u_strcmpFold(const UChar *s1, int32_t length1, 644 const UChar *s2, int32_t length2, 645 uint32_t options, 646 UErrorCode *pErrorCode) { 647 const UCaseProps *csp; 648 649 /* current-level start/limit - s1/s2 as current */ 650 const UChar *start1, *start2, *limit1, *limit2; 651 652 /* case folding variables */ 653 const UChar *p; 654 int32_t length; 655 656 /* stacks of previous-level start/current/limit */ 657 CmpEquivLevel stack1[2], stack2[2]; 658 659 /* case folding buffers, only use current-level start/limit */ 660 UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 661 662 /* track which is the current level per string */ 663 int32_t level1, level2; 664 665 /* current code units, and code points for lookups */ 666 UChar32 c1, c2, cp1, cp2; 667 668 /* no argument error checking because this itself is not an API */ 669 670 /* 671 * assume that at least the option U_COMPARE_IGNORE_CASE is set 672 * otherwise this function would have to behave exactly as uprv_strCompare() 673 */ 674 csp=ucase_getSingleton(pErrorCode); 675 if(U_FAILURE(*pErrorCode)) { 676 return 0; 677 } 678 679 /* initialize */ 680 start1=s1; 681 if(length1==-1) { 682 limit1=NULL; 683 } else { 684 limit1=s1+length1; 685 } 686 687 start2=s2; 688 if(length2==-1) { 689 limit2=NULL; 690 } else { 691 limit2=s2+length2; 692 } 693 694 level1=level2=0; 695 c1=c2=-1; 696 697 /* comparison loop */ 698 for(;;) { 699 /* 700 * here a code unit value of -1 means "get another code unit" 701 * below it will mean "this source is finished" 702 */ 703 704 if(c1<0) { 705 /* get next code unit from string 1, post-increment */ 706 for(;;) { 707 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 708 if(level1==0) { 709 c1=-1; 710 break; 711 } 712 } else { 713 ++s1; 714 break; 715 } 716 717 /* reached end of level buffer, pop one level */ 718 do { 719 --level1; 720 start1=stack1[level1].start; 721 } while(start1==NULL); 722 s1=stack1[level1].s; 723 limit1=stack1[level1].limit; 724 } 725 } 726 727 if(c2<0) { 728 /* get next code unit from string 2, post-increment */ 729 for(;;) { 730 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 731 if(level2==0) { 732 c2=-1; 733 break; 734 } 735 } else { 736 ++s2; 737 break; 738 } 739 740 /* reached end of level buffer, pop one level */ 741 do { 742 --level2; 743 start2=stack2[level2].start; 744 } while(start2==NULL); 745 s2=stack2[level2].s; 746 limit2=stack2[level2].limit; 747 } 748 } 749 750 /* 751 * compare c1 and c2 752 * either variable c1, c2 is -1 only if the corresponding string is finished 753 */ 754 if(c1==c2) { 755 if(c1<0) { 756 return 0; /* c1==c2==-1 indicating end of strings */ 757 } 758 c1=c2=-1; /* make us fetch new code units */ 759 continue; 760 } else if(c1<0) { 761 return -1; /* string 1 ends before string 2 */ 762 } else if(c2<0) { 763 return 1; /* string 2 ends before string 1 */ 764 } 765 /* c1!=c2 && c1>=0 && c2>=0 */ 766 767 /* get complete code points for c1, c2 for lookups if either is a surrogate */ 768 cp1=c1; 769 if(U_IS_SURROGATE(c1)) { 770 UChar c; 771 772 if(U_IS_SURROGATE_LEAD(c1)) { 773 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 774 /* advance ++s1; only below if cp1 decomposes/case-folds */ 775 cp1=U16_GET_SUPPLEMENTARY(c1, c); 776 } 777 } else /* isTrail(c1) */ { 778 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 779 cp1=U16_GET_SUPPLEMENTARY(c, c1); 780 } 781 } 782 } 783 784 cp2=c2; 785 if(U_IS_SURROGATE(c2)) { 786 UChar c; 787 788 if(U_IS_SURROGATE_LEAD(c2)) { 789 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 790 /* advance ++s2; only below if cp2 decomposes/case-folds */ 791 cp2=U16_GET_SUPPLEMENTARY(c2, c); 792 } 793 } else /* isTrail(c2) */ { 794 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 795 cp2=U16_GET_SUPPLEMENTARY(c, c2); 796 } 797 } 798 } 799 800 /* 801 * go down one level for each string 802 * continue with the main loop as soon as there is a real change 803 */ 804 805 if( level1==0 && 806 (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 807 ) { 808 /* cp1 case-folds to the code point "length" or to p[length] */ 809 if(U_IS_SURROGATE(c1)) { 810 if(U_IS_SURROGATE_LEAD(c1)) { 811 /* advance beyond source surrogate pair if it case-folds */ 812 ++s1; 813 } else /* isTrail(c1) */ { 814 /* 815 * we got a supplementary code point when hitting its trail surrogate, 816 * therefore the lead surrogate must have been the same as in the other string; 817 * compare this decomposition with the lead surrogate in the other string 818 * remember that this simulates bulk text replacement: 819 * the decomposition would replace the entire code point 820 */ 821 --s2; 822 c2=*(s2-1); 823 } 824 } 825 826 /* push current level pointers */ 827 stack1[0].start=start1; 828 stack1[0].s=s1; 829 stack1[0].limit=limit1; 830 ++level1; 831 832 /* copy the folding result to fold1[] */ 833 if(length<=UCASE_MAX_STRING_LENGTH) { 834 u_memcpy(fold1, p, length); 835 } else { 836 int32_t i=0; 837 U16_APPEND_UNSAFE(fold1, i, length); 838 length=i; 839 } 840 841 /* set next level pointers to case folding */ 842 start1=s1=fold1; 843 limit1=fold1+length; 844 845 /* get ready to read from decomposition, continue with loop */ 846 c1=-1; 847 continue; 848 } 849 850 if( level2==0 && 851 (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 852 ) { 853 /* cp2 case-folds to the code point "length" or to p[length] */ 854 if(U_IS_SURROGATE(c2)) { 855 if(U_IS_SURROGATE_LEAD(c2)) { 856 /* advance beyond source surrogate pair if it case-folds */ 857 ++s2; 858 } else /* isTrail(c2) */ { 859 /* 860 * we got a supplementary code point when hitting its trail surrogate, 861 * therefore the lead surrogate must have been the same as in the other string; 862 * compare this decomposition with the lead surrogate in the other string 863 * remember that this simulates bulk text replacement: 864 * the decomposition would replace the entire code point 865 */ 866 --s1; 867 c1=*(s1-1); 868 } 869 } 870 871 /* push current level pointers */ 872 stack2[0].start=start2; 873 stack2[0].s=s2; 874 stack2[0].limit=limit2; 875 ++level2; 876 877 /* copy the folding result to fold2[] */ 878 if(length<=UCASE_MAX_STRING_LENGTH) { 879 u_memcpy(fold2, p, length); 880 } else { 881 int32_t i=0; 882 U16_APPEND_UNSAFE(fold2, i, length); 883 length=i; 884 } 885 886 /* set next level pointers to case folding */ 887 start2=s2=fold2; 888 limit2=fold2+length; 889 890 /* get ready to read from decomposition, continue with loop */ 891 c2=-1; 892 continue; 893 } 894 895 /* 896 * no decomposition/case folding, max level for both sides: 897 * return difference result 898 * 899 * code point order comparison must not just return cp1-cp2 900 * because when single surrogates are present then the surrogate pairs 901 * that formed cp1 and cp2 may be from different string indexes 902 * 903 * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 904 * c1=d800 cp1=10001 c2=dc00 cp2=10000 905 * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 906 * 907 * therefore, use same fix-up as in ustring.c/uprv_strCompare() 908 * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 909 * so we have slightly different pointer/start/limit comparisons here 910 */ 911 912 if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 913 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 914 if( 915 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 916 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 917 ) { 918 /* part of a surrogate pair, leave >=d800 */ 919 } else { 920 /* BMP code point - may be surrogate code point - make <d800 */ 921 c1-=0x2800; 922 } 923 924 if( 925 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 926 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 927 ) { 928 /* part of a surrogate pair, leave >=d800 */ 929 } else { 930 /* BMP code point - may be surrogate code point - make <d800 */ 931 c2-=0x2800; 932 } 933 } 934 935 return c1-c2; 936 } 937} 938 939/* public API functions */ 940 941U_CAPI int32_t U_EXPORT2 942u_strCaseCompare(const UChar *s1, int32_t length1, 943 const UChar *s2, int32_t length2, 944 uint32_t options, 945 UErrorCode *pErrorCode) { 946 /* argument checking */ 947 if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 948 return 0; 949 } 950 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 951 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 952 return 0; 953 } 954 return u_strcmpFold(s1, length1, s2, length2, 955 options|U_COMPARE_IGNORE_CASE, 956 pErrorCode); 957} 958 959U_CAPI int32_t U_EXPORT2 960u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 961 UErrorCode errorCode=U_ZERO_ERROR; 962 return u_strcmpFold(s1, -1, s2, -1, 963 options|U_COMPARE_IGNORE_CASE, 964 &errorCode); 965} 966 967U_CAPI int32_t U_EXPORT2 968u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 969 UErrorCode errorCode=U_ZERO_ERROR; 970 return u_strcmpFold(s1, length, s2, length, 971 options|U_COMPARE_IGNORE_CASE, 972 &errorCode); 973} 974 975U_CAPI int32_t U_EXPORT2 976u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 977 UErrorCode errorCode=U_ZERO_ERROR; 978 return u_strcmpFold(s1, n, s2, n, 979 options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 980 &errorCode); 981} 982