1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 2001-2011, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* file name: ustrcase.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002feb20 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Implementation file for string casing C API functions. 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Uses functions from uchar.c for basic functionality that requires access 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* to the Unicode Character Database (uprops.dat). 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 2283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/brkiter.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucasemap.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ubrk.h" 2683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf.h" 2783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 3383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 3483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_NAMESPACE_USE 3583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* string casing ------------------------------------------------------------ */ 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 3883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ 3983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic inline int32_t 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, const UChar *s) { 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* decode the result */ 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* (not) original code point */ 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=~result; 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result<=UCASE_MAX_STRING_LENGTH) { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=result; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=result; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex<destCapacity) { 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append the result */ 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point */ 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isError=FALSE; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND(dest, destIndex, destCapacity, c, isError); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isError) { 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow, nothing written */ 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* string */ 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(length>0) { 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=*s++; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --length; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow */ 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* preflight */ 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U16_LENGTH(c); 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 U_CALLCONV 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf16_caseContextIterator(void *context, int8_t dir) { 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext *csc=(UCaseContext *)context; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for backward iteration */ 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpStart; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(dir>0) { 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for forward iteration */ 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpLimit; 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue current iteration direction */ 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dir=csc->dir; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->start<csc->index) { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->index<csc->limit) { 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Case-maps [srcStart..srcLimit[ but takes 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * context [0..srcLength[ into account. 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_caseMap(const UCaseMap *csm, UCaseMapFull *map, 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, UCaseContext *csc, 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, int32_t srcLimit, 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 134b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t locCache; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locCache=csm->locCache; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=srcStart; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLimit) { 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=srcIndex; 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLimit, c); 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=srcIndex; 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache); 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CFUNC int32_t U_CALLCONV 16583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusustrcase_internalToTitle(const UCaseMap *csm, 16683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 16783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *src, int32_t srcLength, 16883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 171b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t prev, titleStart, titleLimit, idx, destIndex, length; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirstIndex; 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // Use the C++ abstract base class to minimize dependencies. 17983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // TODO: Change UCaseMap.iter to store a BreakIterator directly. 18083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter); 18183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set up local variables */ 18383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t locCache=csm->locCache; 18483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseContext csc=UCASECONTEXT_INITIALIZER; 18583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csc.p=(void *)src; 18683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csc.limit=srcLength; 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=TRUE; 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecasing loop */ 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(prev<srcLength) { 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find next index where to titlecase */ 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isFirstIndex) { 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=FALSE; 19683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius idx=bi->first(); 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 19883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius idx=bi->next(); 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(idx==UBRK_DONE || idx>srcLength) { 201b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=srcLength; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode 4 & 5 section 3.13 Default Case Operations: 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #29, "Text Boundaries." Between each pair of word boundaries, find the first 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cased character F. If F exists, map F to default_title(F); then map each 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * subsequent character C to default_lower(C). 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In this implementation, segment [prev..index[ into 3 parts: 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a) uncased characters (copy as-is) [prev..titleStart[ 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b) first case letter (titlecase) [titleStart..titleLimit[ 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c) subsequent characters (lowercase) [titleLimit..index[ 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 217b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(prev<idx) { 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find and copy uncased characters [prev..titleStart[ */ 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit=prev; 220b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Adjust the titlecasing index (titleStart) to the next cased character. */ 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit; 225b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit==idx) { 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * only uncased characters in [prev..index[ 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stop with titleStart==titleLimit==index 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 232b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U16_NEXT(src, titleLimit, idx, c); 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* cased letter at [titleStart..titleLimit[ */ 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=titleStart-prev; 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR); 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleStart<titleLimit) { 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecase c which is from [titleStart..titleLimit[ */ 24883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csc.cpStart=titleStart; 24983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csc.cpLimit=titleLimit; 25083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, &csc, &s, csm->locale, &locCache); 251c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 252c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 253c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Special case Dutch IJ titlecasing */ 254b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( titleStart+1 < idx && 25583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ucase_getCaseLocale(csm->locale,&locCache) == UCASE_LOC_DUTCH && 256c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) && 257c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) { 258c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c=(UChar32) 0x004A; 259c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 260c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru titleLimit++; 261c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lowercase [titleLimit..index[ */ 264b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit<idx) { 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normal operation: Lowercase the rest of the word. */ 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+= 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _caseMap( 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm, ucase_toFullLower, 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest+destIndex, destCapacity-destIndex, 27183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius src, &csc, 272b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru titleLimit, idx, 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Optionally just copy the rest of the word unchanged. */ 276b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length=idx-titleLimit; 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR); 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 286b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru prev=idx; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#endif // !UCONFIG_NO_BREAK_ITERATION 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* functions available in the common library (for unistr_case.cpp) */ 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CFUNC int32_t U_CALLCONV 30083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusustrcase_internalToLower(const UCaseMap *csm, 30183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 30283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *src, int32_t srcLength, 30383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 30483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseContext csc=UCASECONTEXT_INITIALIZER; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 30783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _caseMap( 30883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm, ucase_toFullLower, 30983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius dest, destCapacity, 31083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius src, &csc, 0, srcLength, 31183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius pErrorCode); 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CFUNC int32_t U_CALLCONV 31583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusustrcase_internalToUpper(const UCaseMap *csm, 31683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 31783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *src, int32_t srcLength, 31883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 31983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseContext csc=UCASECONTEXT_INITIALIZER; 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 32283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _caseMap( 32383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius csm, ucase_toFullUpper, 32483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius dest, destCapacity, 32583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius src, &csc, 0, srcLength, 32683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius pErrorCode); 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic int32_t 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruustr_foldCase(const UCaseProps *csp, 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 338b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=destIndex=0; 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLength) { 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(src, srcIndex, srcLength, c); 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullFolding(csp, c, &s, options); 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) { 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for BMP results */ 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(UChar)c2; 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CFUNC int32_t U_CALLCONV 36083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusustrcase_internalFold(const UCaseMap *csm, 36183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 36283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *src, int32_t srcLength, 36383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 36483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return ustr_foldCase(csm->csp, dest, destCapacity, src, srcLength, csm->options, pErrorCode); 36583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36783a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CFUNC int32_t 36883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusustrcase_map(const UCaseMap *csm, 36983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 37083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *src, int32_t srcLength, 37183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UStringCaseMapper *stringCaseMapper, 37283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar buffer[300]; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *temp; 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check argument values */ 37983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U_FAILURE(*pErrorCode)) { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( destCapacity<0 || 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest==NULL && destCapacity>0) || 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src==NULL || 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength<-1 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the string length */ 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength=u_strlen(src); 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check for overlapping source and destination */ 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( dest!=NULL && 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((src>=dest && src<(dest+destCapacity)) || 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest>=src && dest<(src+srcLength))) 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overlap: provide a temporary destination buffer and later copy the result */ 40283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(destCapacity<=LENGTHOF(buffer)) { 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* the stack buffer is large enough */ 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=buffer; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allocate a buffer */ 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp==NULL) { 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru temp=dest; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, pErrorCode); 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=dest) { 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the result string to the destination buffer */ 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destLength>0) { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t copyLength= destLength<=destCapacity ? destLength : destCapacity; 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(copyLength>0) { 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memmove(dest, temp, copyLength*U_SIZEOF_UCHAR); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(temp!=buffer) { 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(temp); 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strFoldCase(UChar *dest, int32_t destCapacity, 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *src, int32_t srcLength, 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 44183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UCaseMap csm=UCASEMAP_INITIALIZER; 44227f654740f2a26ad62a5c155af9199af9e69b889claireho csm.csp=ucase_getSingleton(); 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm.options=options; 44483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return ustrcase_map( 44583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius &csm, 44683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius dest, destCapacity, 44783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius src, srcLength, 44883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ustrcase_internalFold, pErrorCode); 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* case-insensitive string comparisons -------------------------------------- */ 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This function is a copy of unorm_cmpEquivFold() minus the parts for 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * canonical equivalence. 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Keep the functions in sync, and see there for how this works. 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The duplication is for modularization: 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It makes caseless (but not canonical caseless) matches independent of 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the normalization code. 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* stack element for previous-level source/decomposition pointers */ 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustruct CmpEquivLevel { 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start, *s, *limit; 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querutypedef struct CmpEquivLevel CmpEquivLevel; 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* internal function */ 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC int32_t 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcmpFold(const UChar *s1, int32_t length1, 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UCaseProps *csp; 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current-level start/limit - s1/s2 as current */ 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *start1, *start2, *limit1, *limit2; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding variables */ 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *p; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* stacks of previous-level start/current/limit */ 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CmpEquivLevel stack1[2], stack2[2]; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case folding buffers, only use current-level start/limit */ 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* track which is the current level per string */ 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t level1, level2; 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* current code units, and code points for lookups */ 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c1, c2, cp1, cp2; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* no argument error checking because this itself is not an API */ 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * assume that at least the option U_COMPARE_IGNORE_CASE is set 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * otherwise this function would have to behave exactly as uprv_strCompare() 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 50127f654740f2a26ad62a5c155af9199af9e69b889claireho csp=ucase_getSingleton(); 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* initialize */ 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1; 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length1==-1) { 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=NULL; 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=s1+length1; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length2==-1) { 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=NULL; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=s2+length2; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru level1=level2=0; 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* comparison loop */ 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * here a code unit value of -1 means "get another code unit" 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * below it will mean "this source is finished" 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 1, post-increment */ 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level1==0) { 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level1; 54783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius start1=stack1[level1].start; /*Not uninitialized*/ 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start1==NULL); 54983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius s1=stack1[level1].s; /*Not uninitialized*/ 55083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius limit1=stack1[level1].limit; /*Not uninitialized*/ 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c2<0) { 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get next code unit from string 2, post-increment */ 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(level2==0) { 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reached end of level buffer, pop one level */ 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --level2; 57083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius start2=stack2[level2].start; /*Not uninitialized*/ 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while(start2==NULL); 57283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius s2=stack2[level2].s; /*Not uninitialized*/ 57383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius limit2=stack2[level2].limit; /*Not uninitialized*/ 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare c1 and c2 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * either variable c1, c2 is -1 only if the corresponding string is finished 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1==c2) { 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1<0) { 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; /* c1==c2==-1 indicating end of strings */ 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=c2=-1; /* make us fetch new code units */ 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c1<0) { 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; /* string 1 ends before string 2 */ 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(c2<0) { 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; /* string 2 ends before string 1 */ 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* c1!=c2 && c1>=0 && c2>=0 */ 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get complete code points for c1, c2 for lookups if either is a surrogate */ 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=c1; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s1; only below if cp1 decomposes/case-folds */ 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c1, c); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp1=U16_GET_SUPPLEMENTARY(c, c1); 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=c2; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance ++s2; only below if cp2 decomposes/case-folds */ 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c2, c); 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cp2=U16_GET_SUPPLEMENTARY(c, c2); 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * go down one level for each string 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * continue with the main loop as soon as there is a real change 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level1==0 && 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp1, &p, options))>=0 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp1 case-folds to the code point "length" or to p[length] */ 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c1)) { 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c1)) { 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s1; 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c1) */ { 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s2; 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=*(s2-1); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].start=start1; 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].s=s1; 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack1[0].limit=limit1; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level1; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold1[] */ 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold1, p, length); 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold1, i, length); 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start1=s1=fold1; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit1=fold1+length; 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=-1; 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( level2==0 && 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (length=ucase_toFullFolding(csp, (UChar32)cp2, &p, options))>=0 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* cp2 case-folds to the code point "length" or to p[length] */ 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE(c2)) { 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_IS_SURROGATE_LEAD(c2)) { 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* advance beyond source surrogate pair if it case-folds */ 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++s2; 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* isTrail(c2) */ { 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * we got a supplementary code point when hitting its trail surrogate, 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore the lead surrogate must have been the same as in the other string; 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * compare this decomposition with the lead surrogate in the other string 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * remember that this simulates bulk text replacement: 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the decomposition would replace the entire code point 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --s1; 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=*(s1-1); 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* push current level pointers */ 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].start=start2; 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].s=s2; 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stack2[0].limit=limit2; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++level2; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* copy the folding result to fold2[] */ 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<=UCASE_MAX_STRING_LENGTH) { 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_memcpy(fold2, p, length); 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=0; 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_APPEND_UNSAFE(fold2, i, length); 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=i; 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set next level pointers to case folding */ 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start2=s2=fold2; 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit2=fold2+length; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get ready to read from decomposition, continue with loop */ 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=-1; 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * no decomposition/case folding, max level for both sides: 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * return difference result 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point order comparison must not just return cp1-cp2 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * because when single surrogates are present then the surrogate pairs 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that formed cp1 and cp2 may be from different string indexes 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c1=d800 cp1=10001 c2=dc00 cp2=10000 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * therefore, use same fix-up as in ustring.c/uprv_strCompare() 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * so we have slightly different pointer/start/limit comparisons here 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1-=0x2800; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* part of a surrogate pair, leave >=d800 */ 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* BMP code point - may be surrogate code point - make <d800 */ 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2-=0x2800; 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c1-c2; 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strCaseCompare(const UChar *s1, int32_t length1, 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s2, int32_t length2, 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* argument checking */ 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length1, s2, length2, 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, -1, s2, -1, 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, length, s2, length, 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|U_COMPARE_IGNORE_CASE, 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_strcmpFold(s1, n, s2, n, 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 809