ustrcase.c revision b0ac937921a2c196d8b9da665135bf6ba01a1ccf
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 4b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru* Copyright (C) 2001-2009, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ustrcase.c 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002feb20 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Implementation file for string casing C API functions. 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Uses functions from uchar.c for basic functionality that requires access 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* to the Unicode Character Database (uprops.dat). 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uloc.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucasemap.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ubrk.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unormimp.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* string casing ------------------------------------------------------------ */ 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE int32_t 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, const UChar *s) { 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* decode the result */ 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* (not) original code point */ 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=~result; 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result<=UCASE_MAX_STRING_LENGTH) { 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=result; 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=result; 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex<destCapacity) { 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append the result */ 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point */ 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isError=FALSE; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND(dest, destIndex, destCapacity, c, isError); 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isError) { 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow, nothing written */ 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* string */ 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=*s++; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow */ 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* preflight */ 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 U_CALLCONV 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf16_caseContextIterator(void *context, int8_t dir) { 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext *csc=(UCaseContext *)context; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for backward iteration */ 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpStart; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(dir>0) { 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for forward iteration */ 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpLimit; 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue current iteration direction */ 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dir=csc->dir; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->start<csc->index) { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->index<csc->limit) { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Case-maps [srcStart..srcLimit[ but takes 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * context [0..srcLength[ into account. 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_caseMap(const UCaseMap *csm, UCaseMapFull *map, 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, UCaseContext *csc, 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, int32_t srcLimit, 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 129b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t locCache; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locCache=csm->locCache; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=srcStart; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLimit) { 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=srcIndex; 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLimit, c); 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=srcIndex; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusetTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We could call ucasemap_setLocale(), but here we really only care about 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the initial language subtag, we need not return the real string via 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ucasemap_getLocale(), and we don't care about only getting "x" from 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "x-some-thing" etc. 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We ignore locales with a longer-than-3 initial subtag. 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * We also do not fill in the locCache because it is rarely used, 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and not worth setting unless we reuse it for many case mapping operations. 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (That's why UCaseMap was created.) 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char c; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the internal functions require locale!=NULL */ 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(locale==NULL) { 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locale=uloc_getDefault(); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) { 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[i]=c; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<=3) { 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[i]=0; /* Up to 3 non-separator characters. */ 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */ 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set parameters on an empty UCaseMap, for UCaseMap-less API functions. 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Do this fast because it is called with every function call. 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE void 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerusetTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm->csp==NULL) { 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->csp=ucase_getSingleton(pErrorCode); 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(locale!=NULL && locale[0]==0) { 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[0]=0; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMapLocale(csm, locale, pErrorCode); 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Internal titlecasing function. 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_toTitle(UCaseMap *csm, 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, UCaseContext *csc, 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t prev, titleStart, titleLimit, idx, destIndex, length; 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirstIndex; 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm->iter!=NULL) { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_setText(csm->iter, src, srcLength, pErrorCode); 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->iter=ubrk_open(UBRK_WORD, csm->locale, 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set up local variables */ 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=TRUE; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecasing loop */ 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(prev<srcLength) { 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find next index where to titlecase */ 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isFirstIndex) { 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=FALSE; 244b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_first(csm->iter); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 246b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_next(csm->iter); 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 248b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(idx==UBRK_DONE || idx>srcLength) { 249b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=srcLength; 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode 4 & 5 section 3.13 Default Case Operations: 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #29, "Text Boundaries." Between each pair of word boundaries, find the first 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cased character F. If F exists, map F to default_title(F); then map each 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * subsequent character C to default_lower(C). 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In this implementation, segment [prev..index[ into 3 parts: 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a) uncased characters (copy as-is) [prev..titleStart[ 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b) first case letter (titlecase) [titleStart..titleLimit[ 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c) subsequent characters (lowercase) [titleLimit..index[ 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 265b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(prev<idx) { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find and copy uncased characters [prev..titleStart[ */ 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit=prev; 268b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Adjust the titlecasing index (titleStart) to the next cased character. */ 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit; 273b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit==idx) { 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * only uncased characters in [prev..index[ 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stop with titleStart==titleLimit==index 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 280b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* cased letter at [titleStart..titleLimit[ */ 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=titleStart-prev; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleStart<titleLimit) { 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecase c which is from [titleStart..titleLimit[ */ 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=titleStart; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=titleLimit; 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache); 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Special case Dutch IJ titlecasing */ 302b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( titleStart+1 < idx && 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && 304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && 305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c=(UChar32) 0x004A; 307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru titleLimit++; 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lowercase [titleLimit..index[ */ 312b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit<idx) { 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normal operation: Lowercase the rest of the word. */ 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+= 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _caseMap( 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm, ucase_toFullLower, 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest+destIndex, destCapacity-destIndex, 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, csc, 320b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru titleLimit, idx, 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Optionally just copy the rest of the word unchanged. */ 324b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length=idx-titleLimit; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru prev=idx; 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* functions available in the common library (for unistr_case.cpp) */ 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toLower(const UCaseProps *csp, 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _caseMap(&csm, ucase_toFullLower, 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 0, srcLength, 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toUpper(const UCaseProps *csp, 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return _caseMap(&csm, ucase_toFullUpper, 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 0, srcLength, 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_toTitle(const UCaseProps *csp, 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBreakIterator *titleIter, 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, uint32_t options, 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=csp; 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.iter=titleIter; 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.options=options; 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=_toTitle(&csm, 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, srcLength, 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleIter==NULL && csm.iter!=NULL) { 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm.iter); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_foldCase(const UCaseProps *csp, 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 428b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=destIndex=0; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLength) { 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLength, c); 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullFolding(csp, c, &s, options); 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement argument checking and buffer handling 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for string case mapping as a common function. 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* common internal function for public API functions */ 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucaseMap(const UCaseMap *csm, 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t toWhichCase, 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buffer[300]; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *temp; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check argument values */ 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( destCapacity<0 || 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest==NULL && destCapacity>0) || 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src==NULL || 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength<-1 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the string length */ 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength=u_strlen(src); 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check for overlapping source and destination */ 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( dest!=NULL && 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((src>=dest && src<(dest+destCapacity)) || 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest>=src && dest<(src+srcLength))) 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overlap: provide a temporary destination buffer and later copy the result */ 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destCapacity<=(sizeof(buffer)/U_SIZEOF_UCHAR)) { 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the stack buffer is large enough */ 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=buffer; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate a buffer */ 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp==NULL) { 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=dest; 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=0; 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==FOLD_CASE) { 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength, 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->options, pErrorCode); 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==TO_LOWER) { 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullLower, 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp, destCapacity, 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(toWhichCase==TO_UPPER) { 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullUpper, 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp, destCapacity, 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* if(toWhichCase==TO_TITLE) */ { 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_BREAK_ITERATION 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_UNSUPPORTED_ERROR; 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* UCaseMap is actually non-const in toTitle() APIs. */ 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_toTitle((UCaseMap *)csm, temp, destCapacity, 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, srcLength, 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=dest) { 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the result string to the destination buffer */ 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destLength>0) { 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copyLength>0) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=buffer) { 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(temp); 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToLower(UChar *dest, int32_t destCapacity, 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_LOWER, pErrorCode); 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToUpper(UChar *dest, int32_t destCapacity, 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_UPPER, pErrorCode); 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strToTitle(UChar *dest, int32_t destCapacity, 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBreakIterator *titleIter, 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *locale, 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.iter=titleIter; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setTempCaseMap(&csm, locale, pErrorCode); 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=caseMap(&csm, 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_TITLE, pErrorCode); 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleIter==NULL && csm.iter!=NULL) { 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm.iter); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return length; 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_toTitle(UCaseMap *csm, 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_TITLE, pErrorCode); 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFoldCase(UChar *dest, int32_t destCapacity, 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap csm={ NULL }; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.csp=ucase_getSingleton(pErrorCode); 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.options=options; 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(&csm, 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, srcLength, 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FOLD_CASE, pErrorCode); 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* case-insensitive string comparisons -------------------------------------- */ 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function is a copy of unorm_cmpEquivFold() minus the parts for 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * canonical equivalence. 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Keep the functions in sync, and see there for how this works. 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The duplication is for modularization: 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It makes caseless (but not canonical caseless) matches independent of 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the normalization code. 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* stack element for previous-level source/decomposition pointers */ 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct CmpEquivLevel { 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start, *s, *limit; 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct CmpEquivLevel CmpEquivLevel; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* internal function */ 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcmpFold(const UChar *s1, int32_t length1, 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCaseProps *csp; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current-level start/limit - s1/s2 as current */ 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start1, *start2, *limit1, *limit2; 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding variables */ 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *p; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* stacks of previous-level start/current/limit */ 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CmpEquivLevel stack1[2], stack2[2]; 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding buffers, only use current-level start/limit */ 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* track which is the current level per string */ 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t level1, level2; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current code units, and code points for lookups */ 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c1, c2, cp1, cp2; 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no argument error checking because this itself is not an API */ 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * assume that at least the option U_COMPARE_IGNORE_CASE is set 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise this function would have to behave exactly as uprv_strCompare() 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csp=ucase_getSingleton(pErrorCode); 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* initialize */ 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length1==-1) { 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=NULL; 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=s1+length1; 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2; 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length2==-1) { 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=NULL; 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=s2+length2; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level1=level2=0; 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* comparison loop */ 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * here a code unit value of -1 means "get another code unit" 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * below it will mean "this source is finished" 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 1, post-increment */ 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level1==0) { 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level1; 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=stack1[level1].start; 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start1==NULL); 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s1=stack1[level1].s; 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=stack1[level1].limit; 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c2<0) { 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 2, post-increment */ 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level2==0) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level2; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=stack2[level2].start; 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start2==NULL); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s2=stack2[level2].s; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=stack2[level2].limit; 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare c1 and c2 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * either variable c1, c2 is -1 only if the corresponding string is finished 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1==c2) { 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; /* c1==c2==-1 indicating end of strings */ 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; /* make us fetch new code units */ 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c1<0) { 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; /* string 1 ends before string 2 */ 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c2<0) { 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; /* string 2 ends before string 1 */ 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* c1!=c2 && c1>=0 && c2>=0 */ 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get complete code points for c1, c2 for lookups if either is a surrogate */ 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=c1; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s1; only below if cp1 decomposes/case-folds */ 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c1, c); 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c, c1); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=c2; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s2; only below if cp2 decomposes/case-folds */ 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c2, c); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c, c2); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go down one level for each string 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the main loop as soon as there is a real change 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level1==0 && 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp1 case-folds to the code point "length" or to p[length] */ 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s2; 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=*(s2-1); 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].start=start1; 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].s=s1; 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].limit=limit1; 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level1; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold1[] */ 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold1, p, length); 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold1, i, length); 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1=fold1; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=fold1+length; 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level2==0 && 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp2 case-folds to the code point "length" or to p[length] */ 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s1; 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=*(s1-1); 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].start=start2; 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].s=s2; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].limit=limit2; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level2; 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold2[] */ 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold2, p, length); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold2, i, length); 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2=fold2; 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=fold2+length; 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * no decomposition/case folding, max level for both sides: 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * return difference result 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point order comparison must not just return cp1-cp2 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because when single surrogates are present then the surrogate pairs 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that formed cp1 and cp2 may be from different string indexes 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c1=d800 cp1=10001 c2=dc00 cp2=10000 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore, use same fix-up as in ustring.c/uprv_strCompare() 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we have slightly different pointer/start/limit comparisons here 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1-=0x2800; 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2-=0x2800; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c1-c2; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strCaseCompare(const UChar *s1, int32_t length1, 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* argument checking */ 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length1, s2, length2, 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, -1, s2, -1, 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length, s2, length, 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, n, s2, n, 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 992