ucasemap.c revision 27f654740f2a26ad62a5c155af9199af9e69b889
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 427f654740f2a26ad62a5c155af9199af9e69b889claireho* Copyright (C) 2005-2010, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* file name: ucasemap.c 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2005may06 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Case mapping service object and functions using it. 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uloc.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucasemap.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ubrk.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utext.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustr_imp.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* UCaseMap service object -------------------------------------------------- */ 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UCaseMap * U_EXPORT2 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseMap *csm; 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm=(UCaseMap *)uprv_malloc(sizeof(UCaseMap)); 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm==NULL) { 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memset(csm, 0, sizeof(UCaseMap)); 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4827f654740f2a26ad62a5c155af9199af9e69b889claireho csm->csp=ucase_getSingleton(); 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucasemap_setLocale(csm, locale, pErrorCode); 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(csm); 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->options=options; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return csm; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_close(UCaseMap *csm) { 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm!=NULL) { 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm->iter); 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(csm); 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI const char * U_EXPORT2 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_getLocale(const UCaseMap *csm) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return csm->locale; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI uint32_t U_EXPORT2 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_getOptions(const UCaseMap *csm) { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return csm->options; 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length; 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ZERO_ERROR; 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we only really need the language code for case mappings */ 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length==sizeof(csm->locale)) { 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locCache=0; 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(*pErrorCode)) { 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucase_getCaseLocale(csm->locale, &csm->locCache); 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->locale[0]=0; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->options=options; 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI const UBreakIterator * U_EXPORT2 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_getBreakIterator(const UCaseMap *csm) { 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return csm->iter; 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI void U_EXPORT2 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) { 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_close(csm->iter); 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->iter=iterToAdopt; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* UTF-8 string case mappings ----------------------------------------------- */ 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* TODO(markus): Move to a new, separate utf8case.c file. */ 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_INLINE int32_t 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruappendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, const UChar *s) { 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length, destLength; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* decode the result */ 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(result<0) { 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* (not) original code point */ 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=~result; 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(result<=UCASE_MAX_STRING_LENGTH) { 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=U_SENTINEL; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=result; 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=result; 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=-1; 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex<destCapacity) { 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* append the result */ 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* code point */ 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isError=FALSE; 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_APPEND(dest, destIndex, destCapacity, c, isError); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isError) { 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* overflow, nothing written */ 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U8_LENGTH(c); 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* string */ 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_strToUTF8( 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (char *)(dest+destIndex), destCapacity-destIndex, &destLength, 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s, length, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=destLength; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* we might have an overflow, but we know the actual length */ 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* preflight */ 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length<0) { 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=U8_LENGTH(c); 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_strToUTF8( 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, 0, &destLength, 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s, length, 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &errorCode); 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=destLength; 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UChar32 U_CALLCONV 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_caseContextIterator(void *context, int8_t dir) { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext *csc=(UCaseContext *)context; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for backward iteration */ 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpStart; 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(dir>0) { 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* reset for forward iteration */ 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->index=csc->cpLimit; 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->dir=dir; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* continue current iteration direction */ 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dir=csc->dir; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(dir<0) { 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->start<csc->index) { 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csc->index<csc->limit) { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SENTINEL; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Case-maps [srcStart..srcLimit[ but takes 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * context [0..srcLength[ into account. 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_caseMap(const UCaseMap *csm, UCaseMapFull *map, 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity, 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src, UCaseContext *csc, 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcStart, int32_t srcLimit, 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 228b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UChar32 c, c2 = 0; 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t locCache; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru locCache=csm->locCache; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=srcStart; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLimit) { 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=srcIndex; 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(src, srcIndex, srcLimit, c); 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=srcIndex; 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i=csc->cpStart; 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(destIndex<destCapacity && i<srcIndex) { 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=src[i++]; 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache); 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for ASCII results */ 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(uint8_t)c2; 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Internal titlecasing function. 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_toTitle(UCaseMap *csm, 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity, 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src, UCaseContext *csc, 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UText utext=UTEXT_INITIALIZER; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 277b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru int32_t prev, titleStart, titleLimit, idx, destIndex, length; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirstIndex; 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(csm->iter==NULL) { 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->iter=ubrk_open(UBRK_WORD, csm->locale, 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, 0, 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ubrk_setUText(csm->iter, &utext, pErrorCode); 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utext_close(&utext); 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* set up local variables */ 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=0; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prev=0; 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=TRUE; 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecasing loop */ 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(prev<srcLength) { 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find next index where to titlecase */ 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isFirstIndex) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirstIndex=FALSE; 305b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_first(csm->iter); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 307b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=ubrk_next(csm->iter); 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 309b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(idx==UBRK_DONE || idx>srcLength) { 310b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru idx=srcLength; 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode 4 & 5 section 3.13 Default Case Operations: 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #29, "Text Boundaries." Between each pair of word boundaries, find the first 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cased character F. If F exists, map F to default_title(F); then map each 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * subsequent character C to default_lower(C). 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * In this implementation, segment [prev..index[ into 3 parts: 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a) uncased characters (copy as-is) [prev..titleStart[ 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * b) first case letter (titlecase) [titleStart..titleLimit[ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c) subsequent characters (lowercase) [titleLimit..index[ 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 326b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(prev<idx) { 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* find and copy uncased characters [prev..titleStart[ */ 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit=prev; 329b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U8_NEXT(src, titleLimit, idx, c); 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) { 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Adjust the titlecasing index (titleStart) to the next cased character. */ 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru titleStart=titleLimit; 334b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit==idx) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * only uncased characters in [prev..index[ 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * stop with titleStart==titleLimit==index 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru U8_NEXT(src, titleLimit, idx, c); 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UCASE_NONE!=ucase_getType(csm->csp, c)) { 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; /* cased letter at [titleStart..titleLimit[ */ 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=titleStart-prev; 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(length>0) { 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+prev, length); 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(titleStart<titleLimit) { 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* titlecase c which is from [titleStart..titleLimit[ */ 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpStart=titleStart; 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc->cpLimit=titleLimit; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullTitle(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &csm->locCache); 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 362c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 363c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* Special case Dutch IJ titlecasing */ 364b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if ( titleStart+1 < idx && 365c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH && 366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart] == 0x0049 || src[titleStart] == 0x0069 ) && 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru ( src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A )) { 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru c=0x004A; 369c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 370c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru titleLimit++; 371c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* lowercase [titleLimit..index[ */ 373b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru if(titleLimit<idx) { 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) { 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Normal operation: Lowercase the rest of the word. */ 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+= 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _caseMap( 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm, ucase_toFullLower, 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest+destIndex, destCapacity-destIndex, 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, csc, 381b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru titleLimit, idx, 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Optionally just copy the rest of the word unchanged. */ 385b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru length=idx-titleLimit; 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex+length)<=destCapacity) { 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_memcpy(dest+destIndex, src+titleLimit, length); 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex+=length; 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 395b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru prev=idx; 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utext_close(&utext); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruutf8_foldCase(const UCaseProps *csp, 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity, 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src, int32_t srcLength, 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options, 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcIndex, destIndex; 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *s; 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c, c2; 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start; 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* case mapping loop */ 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcIndex=destIndex=0; 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(srcIndex<srcLength) { 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru start=srcIndex; 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U8_NEXT(src, srcIndex, srcLength, c); 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c<0) { 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(destIndex<destCapacity && start<srcIndex) { 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=src[start++]; 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=ucase_toFullFolding(csp, c, &s, options); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) { 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* fast path version of appendResult() for ASCII results */ 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest[destIndex++]=(uint8_t)c2; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destIndex=appendResult(dest, destIndex, destCapacity, c, s); 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(destIndex>destCapacity) { 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return destIndex; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Implement argument checking and buffer handling 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * for string case mapping as a common function. 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* common internal function for public API functions */ 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucaseMap(const UCaseMap *csm, 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t *dest, int32_t destCapacity, 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const uint8_t *src, int32_t srcLength, 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t toWhichCase, 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t destLength; 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check argument values */ 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( destCapacity<0 || 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest==NULL && destCapacity>0) || 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src==NULL || 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru srcLength<-1 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get the string length */ 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(srcLength==-1) { 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho srcLength=(int32_t)uprv_strlen((const char *)src); 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check for overlapping source and destination */ 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( dest!=NULL && 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((src>=dest && src<(dest+destCapacity)) || 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (dest>=src && dest<(src+srcLength))) 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=0; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==FOLD_CASE) { 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=utf8_foldCase(csm->csp, dest, destCapacity, src, srcLength, 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csm->options, pErrorCode); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCaseContext csc={ NULL }; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.p=(void *)src; 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru csc.limit=srcLength; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(toWhichCase==TO_LOWER) { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullLower, 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(toWhichCase==TO_UPPER) { 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru destLength=_caseMap(csm, ucase_toFullUpper, 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest, destCapacity, 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, srcLength, 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* if(toWhichCase==TO_TITLE) */ { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_BREAK_ITERATION 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_UNSUPPORTED_ERROR; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* UCaseMap is actually non-const in toTitle() APIs. */ 515b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru UCaseMap *tmp = (UCaseMap *)csm; 516b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru destLength=_toTitle(tmp, dest, destCapacity, 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru src, &csc, srcLength, 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pErrorCode); 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode); 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* public API functions */ 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_utf8ToLower(const UCaseMap *csm, 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, int32_t destCapacity, 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, int32_t srcLength, 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)dest, destCapacity, 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (const uint8_t *)src, srcLength, 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_LOWER, pErrorCode); 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_utf8ToUpper(const UCaseMap *csm, 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, int32_t destCapacity, 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, int32_t srcLength, 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)dest, destCapacity, 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (const uint8_t *)src, srcLength, 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_UPPER, pErrorCode); 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_BREAK_ITERATION 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_utf8ToTitle(UCaseMap *csm, 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, int32_t destCapacity, 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, int32_t srcLength, 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)dest, destCapacity, 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (const uint8_t *)src, srcLength, 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TO_TITLE, pErrorCode); 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruucasemap_utf8FoldCase(const UCaseMap *csm, 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *dest, int32_t destCapacity, 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *src, int32_t srcLength, 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return caseMap(csm, 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint8_t *)dest, destCapacity, 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (const uint8_t *)src, srcLength, 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FOLD_CASE, pErrorCode); 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 575