1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2001-2010, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ustrcase.c 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002feb20 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Implementation file for string casing C API functions. 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Uses functions from uchar.c for basic functionality that requires access 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* to the Unicode Character Database (uprops.dat). 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uloc.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucasemap.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ubrk.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* string casing ------------------------------------------------------------ */ 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE int32_t 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, const UChar *s) { 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* decode the result */ 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* (not) original code point */ 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=~result; 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result<=UCASE_MAX_STRING_LENGTH) { 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=result; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=result; 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex<destCapacity) { 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append the result */ 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point */ 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isError=FALSE; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND(dest, destIndex, destCapacity, c, isError); 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isError) { 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow, nothing written */ 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* string */ 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=*s++; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow */ 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* preflight */ 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 U_CALLCONV 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf16_caseContextIterator(void *context, int8_t dir) { 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext *csc=(UCaseContext *)context; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for backward iteration */ 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpStart; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(dir>0) { 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for forward iteration */ 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpLimit; 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue current iteration direction */ 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dir=csc->dir; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->start<csc->index) { 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->index<csc->limit) { 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Case-maps [srcStart..srcLimit[ but takes 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * context [0..srcLength[ into account. 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_caseMap(const UCaseMap *csm, UCaseMapFull *map, 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, UCaseContext *csc, 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, int32_t srcLimit, 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 128b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t locCache; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locCache=csm->locCache; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=srcStart; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLimit) { 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=srcIndex; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLimit, c); 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=srcIndex; 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusetTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We could call ucasemap_setLocale(), but here we really only care about 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial language subtag, we need not return the real string via 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ucasemap_getLocale(), and we don't care about only getting "x" from 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "x-some-thing" etc. 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We ignore locales with a longer-than-3 initial subtag. 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We also do not fill in the locCache because it is rarely used, 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and not worth setting unless we reuse it for many case mapping operations. 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (That's why UCaseMap was created.) 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char c; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the internal functions require locale!=NULL */ 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(locale==NULL) { 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locale=uloc_getDefault(); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[i]=c; 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<=3) { 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[i]=0; /* Up to 3 non-separator characters. */ 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Do this fast because it is called with every function call. 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE void 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusetTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm->csp==NULL) { 19427f654740f2a26ad62a5c155af9199af9e69b889claireho csm->csp=ucase_getSingleton(); 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(locale!=NULL && locale[0]==0) { 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[0]=0; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMapLocale(csm, locale, pErrorCode); 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Internal titlecasing function. 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_toTitle(UCaseMap *csm, 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, UCaseContext *csc, 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 216b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t prev, titleStart, titleLimit, idx, destIndex, length; 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirstIndex; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm->iter!=NULL) { 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_setText(csm->iter, src, srcLength, pErrorCode); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->iter=ubrk_open(UBRK_WORD, csm->locale, 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set up local variables */ 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=TRUE; 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecasing loop */ 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(prev<srcLength) { 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find next index where to titlecase */ 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isFirstIndex) { 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=FALSE; 240b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_first(csm->iter); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 242b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_next(csm->iter); 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(idx==UBRK_DONE || idx>srcLength) { 245b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=srcLength; 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode 4 & 5 section 3.13 Default Case Operations: 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #29, "Text Boundaries." Between each pair of word boundaries, find the first 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cased character F. If F exists, map F to default_title(F); then map each 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * subsequent character C to default_lower(C). 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In this implementation, segment [prev..index[ into 3 parts: 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a) uncased characters (copy as-is) [prev..titleStart[ 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b) first case letter (titlecase) [titleStart..titleLimit[ 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c) subsequent characters (lowercase) [titleLimit..index[ 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 261b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(prev<idx) { 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find and copy uncased characters [prev..titleStart[ */ 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit=prev; 264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Adjust the titlecasing index (titleStart) to the next cased character. */ 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit; 269b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit==idx) { 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * only uncased characters in [prev..index[ 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stop with titleStart==titleLimit==index 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* cased letter at [titleStart..titleLimit[ */ 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=titleStart-prev; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleStart<titleLimit) { 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecase c which is from [titleStart..titleLimit[ */ 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=titleStart; 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=titleLimit; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); 295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Special case Dutch IJ titlecasing */ 298b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( titleStart+1 < idx && 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c=(UChar32) 0x004A; 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru titleLimit++; 305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lowercase [titleLimit..index[ */ 308b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit<idx) { 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normal operation: Lowercase the rest of the word. */ 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+= 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _caseMap( 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm, ucase_toFullLower, 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest+destIndex, destCapacity-destIndex, 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, csc, 316b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru titleLimit, idx, 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Optionally just copy the rest of the word unchanged. */ 320b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length=idx-titleLimit; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 330b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru prev=idx; 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* functions available in the common library (for unistr_case.cpp) */ 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toLower(const UCaseProps *csp, 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _caseMap(&csm, ucase_toFullLower, 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 0, srcLength, 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toUpper(const UCaseProps *csp, 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _caseMap(&csm, ucase_toFullUpper, 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 0, srcLength, 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toTitle(const UCaseProps *csp, 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBreakIterator *titleIter, 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, uint32_t options, 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.iter=titleIter; 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.options=options; 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=_toTitle(&csm, 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, srcLength, 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleIter==NULL && csm.iter!=NULL) { 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm.iter); 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_foldCase(const UCaseProps *csp, 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 424b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=destIndex=0; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLength) { 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLength, c); 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullFolding(csp, c, &s, options); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement argument checking and buffer handling 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for string case mapping as a common function. 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* common internal function for public API functions */ 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucaseMap(const UCaseMap *csm, 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t toWhichCase, 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buffer[300]; 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *temp; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check argument values */ 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( destCapacity<0 || 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest==NULL && destCapacity>0) || 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src==NULL || 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength<-1 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the string length */ 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength=u_strlen(src); 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check for overlapping source and destination */ 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( dest!=NULL && 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((src>=dest && src<(dest+destCapacity)) || 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest>=src && dest<(src+srcLength))) 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overlap: provide a temporary destination buffer and later copy the result */ 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the stack buffer is large enough */ 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=buffer; 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate a buffer */ 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp==NULL) { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=dest; 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=0; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==FOLD_CASE) { 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->options, pErrorCode); 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==TO_LOWER) { 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullLower, 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp, destCapacity, 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(toWhichCase==TO_UPPER) { 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullUpper, 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp, destCapacity, 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* if(toWhichCase==TO_TITLE) */ { 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_BREAK_ITERATION 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_UNSUPPORTED_ERROR; 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* UCaseMap is actually non-const in toTitle() APIs. */ 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, srcLength, 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=dest) { 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the result string to the destination buffer */ 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destLength>0) { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copyLength>0) { 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=buffer) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(temp); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToLower(UChar *dest, int32_t destCapacity, 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_LOWER, pErrorCode); 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUpper(UChar *dest, int32_t destCapacity, 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_UPPER, pErrorCode); 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToTitle(UChar *dest, int32_t destCapacity, 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBreakIterator *titleIter, 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.iter=titleIter; 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=caseMap(&csm, 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_TITLE, pErrorCode); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleIter==NULL && csm.iter!=NULL) { 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm.iter); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_toTitle(UCaseMap *csm, 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_TITLE, pErrorCode); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFoldCase(UChar *dest, int32_t destCapacity, 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 62227f654740f2a26ad62a5c155af9199af9e69b889claireho csm.csp=ucase_getSingleton(); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.options=options; 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FOLD_CASE, pErrorCode); 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* case-insensitive string comparisons -------------------------------------- */ 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function is a copy of unorm_cmpEquivFold() minus the parts for 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * canonical equivalence. 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Keep the functions in sync, and see there for how this works. 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The duplication is for modularization: 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It makes caseless (but not canonical caseless) matches independent of 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the normalization code. 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* stack element for previous-level source/decomposition pointers */ 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct CmpEquivLevel { 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start, *s, *limit; 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct CmpEquivLevel CmpEquivLevel; 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* internal function */ 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcmpFold(const UChar *s1, int32_t length1, 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCaseProps *csp; 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current-level start/limit - s1/s2 as current */ 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start1, *start2, *limit1, *limit2; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding variables */ 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *p; 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* stacks of previous-level start/current/limit */ 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CmpEquivLevel stack1[2], stack2[2]; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding buffers, only use current-level start/limit */ 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* track which is the current level per string */ 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t level1, level2; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current code units, and code points for lookups */ 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c1, c2, cp1, cp2; 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no argument error checking because this itself is not an API */ 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * assume that at least the option U_COMPARE_IGNORE_CASE is set 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise this function would have to behave exactly as uprv_strCompare() 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 68027f654740f2a26ad62a5c155af9199af9e69b889claireho csp=ucase_getSingleton(); 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* initialize */ 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1; 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length1==-1) { 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=NULL; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=s1+length1; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length2==-1) { 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=NULL; 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=s2+length2; 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level1=level2=0; 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* comparison loop */ 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * here a code unit value of -1 means "get another code unit" 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * below it will mean "this source is finished" 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 1, post-increment */ 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level1==0) { 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level1; 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=stack1[level1].start; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start1==NULL); 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s1=stack1[level1].s; 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=stack1[level1].limit; 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c2<0) { 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 2, post-increment */ 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level2==0) { 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level2; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=stack2[level2].start; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start2==NULL); 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s2=stack2[level2].s; 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=stack2[level2].limit; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare c1 and c2 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * either variable c1, c2 is -1 only if the corresponding string is finished 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1==c2) { 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; /* c1==c2==-1 indicating end of strings */ 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; /* make us fetch new code units */ 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c1<0) { 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; /* string 1 ends before string 2 */ 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c2<0) { 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; /* string 2 ends before string 1 */ 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* c1!=c2 && c1>=0 && c2>=0 */ 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get complete code points for c1, c2 for lookups if either is a surrogate */ 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=c1; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s1; only below if cp1 decomposes/case-folds */ 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c1, c); 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c, c1); 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=c2; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s2; only below if cp2 decomposes/case-folds */ 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c2, c); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c, c2); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go down one level for each string 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the main loop as soon as there is a real change 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level1==0 && 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp1 case-folds to the code point "length" or to p[length] */ 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s2; 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=*(s2-1); 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].start=start1; 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].s=s1; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].limit=limit1; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level1; 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold1[] */ 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold1, p, length); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold1, i, length); 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1=fold1; 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=fold1+length; 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level2==0 && 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp2 case-folds to the code point "length" or to p[length] */ 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s1; 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=*(s1-1); 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].start=start2; 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].s=s2; 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].limit=limit2; 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level2; 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold2[] */ 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold2, p, length); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold2, i, length); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2=fold2; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=fold2+length; 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * no decomposition/case folding, max level for both sides: 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * return difference result 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point order comparison must not just return cp1-cp2 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because when single surrogates are present then the surrogate pairs 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that formed cp1 and cp2 may be from different string indexes 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c1=d800 cp1=10001 c2=dc00 cp2=10000 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore, use same fix-up as in ustring.c/uprv_strCompare() 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we have slightly different pointer/start/limit comparisons here 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1-=0x2800; 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2-=0x2800; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c1-c2; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strCaseCompare(const UChar *s1, int32_t length1, 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* argument checking */ 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length1, s2, length2, 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, -1, s2, -1, 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length, s2, length, 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, n, s2, n, 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 988