16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2004-2012, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: ucase.cpp 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2004aug30 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Low-level Unicode character/string case mapping code. 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Much code moved here (and modified) from uchar.c. 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uset.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h" /* UDataInfo */ 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucmndata.h" /* DataHeader */ 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "udatamem.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "umutex.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "utrie2.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucln_cmn.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct UCaseProps { 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UDataMemory *mem; 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *indexes; 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *exceptions; 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unfold; 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UTrie2 trie; 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t formatVersion[4]; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* ucase_props_data.h is machine-generated by gencase --csource */ 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define INCLUDED_FROM_UCASE_CPP 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucase_props_data.h" 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UCaseProps singleton ----------------------------------------------------- */ 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const UCaseProps * U_EXPORT2 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_getSingleton() { 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return &ucase_props_singleton; 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* set of property starts for UnicodeSet ------------------------------------ */ 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the start code point to the USet */ 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const USetAdder *sa=(const USetAdder *)context; 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, start); 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void U_EXPORT2 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode) { 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the start code point of each same-value range of the trie */ 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie2_enum(&csp->trie, NULL, _enumPropertyStartsRange, sa); 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add code points with hardcoded properties, plus the ones following them */ 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* (none right now, see comment below) */ 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Omit code points with hardcoded specialcasing properties 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * because we do not build property UnicodeSets for them right now. 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* data access primitives --------------------------------------------------- */ 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define PROPS_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* number of bits in an 8-bit integer value */ 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint8_t flagsOffset[256]={ 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx))) 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)] 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get the value of an optional-value slot where HAS_SLOT(excWord, idx). 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param excWord (in) initial exceptions word 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param idx (in) desired slot index 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++; 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * moved to the last uint16_t of the value, use +1 for beginning of next slot 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define GET_SLOT_VALUE(excWord, idx, pExc16, value) \ 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \ 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (pExc16)+=SLOT_OFFSET(excWord, idx); \ 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (value)=*pExc16; \ 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { \ 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (pExc16)+=2*SLOT_OFFSET(excWord, idx); \ 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (value)=*pExc16++; \ 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (value)=((value)<<16)|*pExc16; \ 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* simple case mappings ----------------------------------------------------- */ 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_tolower(const UCaseProps *csp, UChar32 c) { 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=UCASE_GET_DELTA(props); 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props); 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_toupper(const UCaseProps *csp, UChar32 c) { 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)==UCASE_LOWER) { 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=UCASE_GET_DELTA(props); 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props); 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_totitle(const UCaseProps *csp, UChar32 c) { 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)==UCASE_LOWER) { 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=UCASE_GET_DELTA(props); 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t idx; 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_TITLE; 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_UPPER; 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, idx, pe, c); 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar iDot[2] = { 0x69, 0x307 }; 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar jDot[2] = { 0x6a, 0x307 }; 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar iOgonekDot[3] = { 0x12f, 0x307 }; 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 }; 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 }; 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void U_EXPORT2 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa) { 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props; 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Hardcode the case closure of i and its relatives and ignore the 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data file data for these characters. 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The Turkic dotless i and dotted I with their case mapping conditions 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and case folding option make the related characters behave specially. 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This code matches their closure behavior to their case folding behavior. 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(c) { 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x49: 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* regular i and I are in one equivalence class */ 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, 0x69); 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x69: 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, 0x49); 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x130: 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->addString(sa->set, iDot, 2); 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x131: 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* dotless i is in a class by itself */ 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* otherwise use the data file data */ 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org props=UTRIE2_GET16(&csp->trie, c); 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)!=UCASE_NONE) { 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the one simple case mapping, no matter what type it is */ 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t delta=UCASE_GET_DELTA(props); 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(delta!=0) { 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c+delta); 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c has exceptions, so there may be multiple simple and/or 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * full case mappings. Add them all. 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe0, *pe=GET_EXCEPTIONS(csp, props); 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *closure; 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t idx, closureLength, fullLength, length; 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe0=pe; 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add all simple case mappings */ 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, idx)) { 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe=pe0; 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, idx, pe, c); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the closure string pointer & length */ 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) { 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe=pe0; 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength); 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */ 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */ 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org closureLength=0; 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org closure=NULL; 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the full case folding */ 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe=pe0; 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength); 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* start of full case mapping strings */ 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++pe; 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fullLength&=0xffff; /* bits 16 and higher are reserved */ 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip the lowercase result string */ 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=fullLength&UCASE_FULL_LOWER; 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fullLength>>=4; 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the full case folding string */ 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=fullLength&0xf; 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length!=0) { 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->addString(sa->set, (const UChar *)pe, length); 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=length; 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip the uppercase and titlecase strings */ 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fullLength>>=4; 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=fullLength&0xf; 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fullLength>>=4; 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=fullLength; 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org closure=(const UChar *)pe; /* behind full case mappings */ 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add each code point in the closure string */ 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(idx=0; idx<closureLength;) { 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(closure, idx, c); 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * compare s, which has a length, with t, which has a maximum length or is NUL-terminated 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * must be length>0 and max>0 and length<=max 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline int32_t 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstrcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t c1, c2; 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org max-=length; /* we require length<=max, so no need to decrement max in the loop */ 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c1=*s++; 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c2=*t++; 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c2==0) { 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 1; /* reached the end of t but not of s */ 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c1-=c2; 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c1!=0) { 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c1; /* return difference result */ 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(--length>0); 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* ends with length==0 */ 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(max==0 || *t==0) { 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* equal to length of both strings */ 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -max; /* return lengh difference */ 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UBool U_EXPORT2 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa) { 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(csp->unfold==NULL || s==NULL) { 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* no reverse case folding data, or no string */ 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<=1) { 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the string is too short to find any match */ 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * more precise would be: 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if(!u_strHasMoreChar32Than(s, length, 1)) 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but this does not make much practical difference because 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single supplementary code point would just not be found 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unfold=csp->unfold; 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldRows=unfold[UCASE_UNFOLD_ROWS]; 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfold+=unfoldRowWidth; 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>unfoldStringWidth) { 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the string is too long to find any match */ 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* do a binary search for the string */ 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=0; 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=unfoldRows; 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(start<limit) { 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i=(start+limit)/2; 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth)); 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=strcmpMax(s, length, p, unfoldStringWidth); 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(result==0) { 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* found the string: add each code point, and its case closure */ 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) { 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(p, i, c); 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_addCaseClosure(csp, c, sa); 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(result<0) { 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=i; 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* result>0 */ { 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=i+1; 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* string not found */ 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgFullCaseFoldingIterator::FullCaseFoldingIterator() 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)), 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldRows(unfold[UCASE_UNFOLD_ROWS]), 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]), 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]), 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentRow(0), 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org rowCpIndex(unfoldStringWidth) { 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unfold+=unfoldRowWidth; 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgFullCaseFoldingIterator::next(UnicodeString &full) { 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Advance past the last-delivered code point. 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *p=unfold+(currentRow*unfoldRowWidth); 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) { 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++currentRow; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p+=unfoldRowWidth; 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org rowCpIndex=unfoldStringWidth; 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(currentRow>=unfoldRows) { return U_SENTINEL; } 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Set "full" to the NUL-terminated string in the first unfold column. 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=unfoldStringWidth; 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(length>0 && p[length-1]==0) { --length; } 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full.setTo(FALSE, p, length); 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Return the code point. 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_NEXT_UNSAFE(p, rowCpIndex, c); 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_getType(const UCaseProps *csp, UChar32 c) { 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCASE_GET_TYPE(props); 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) { 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCASE_GET_TYPE_AND_IGNORABLE(props); 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic inline int32_t 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetDotType(const UCaseProps *csp, UChar32 c) { 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return props&UCASE_DOT_MASK; 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props); 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_isSoftDotted(const UCaseProps *csp, UChar32 c) { 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(getDotType(csp, c)==UCASE_SOFT_DOTTED); 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_isCaseSensitive(const UCaseProps *csp, UChar32 c) { 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)((props&UCASE_SENSITIVE)!=0); 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* string casing ------------------------------------------------------------ */ 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * These internal functions form the core of string case mappings. 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * They map single code points to result code points or strings and take 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * all necessary conditions (context, locale ID, options) into account. 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * They do not iterate over the source or write to the destination 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * so that the same functions are useful for non-standard string storage, 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For the same reason, the "surrounding text" context is passed in as a 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UCaseContextIterator which does not make any assumptions about 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the underlying storage. 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This section contains helper functions that check for conditions 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the input text surrounding the current code point 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * according to SpecialCasing.txt. 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Each helper function gets the index 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - after the current code point if it looks at following text 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - before the current code point if it looks at preceding text 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Final_Sigma 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * C is preceded by a sequence consisting of 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a cased letter and a case-ignorable sequence, 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and C is not followed by a sequence consisting of 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * an ignorable sequence and then a cased letter. 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * More_Above 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * C is followed by one or more characters of combining class 230 (ABOVE) 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the combining character sequence. 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * After_Soft_Dotted 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The last preceding character with combining class of zero before C 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * was Soft_Dotted, 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and there is no intervening combining character class 230 (ABOVE). 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Before_Dot 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * C is followed by combining dot above (U+0307). 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Any sequence of characters with a combining class that is neither 0 nor 230 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * may intervene between the current character and the combining dot above. 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The erratum from 2002-10-31 adds the condition 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * After_I 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The last preceding base character was an uppercase I, and there is no 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * intervening combining character class 230 (ABOVE). 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (See Jitterbug 2344 and the comments on After_I below.) 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Helper definitions in Unicode 3.2 UAX 21: 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * D1. A character C is defined to be cased 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if it meets any of the following criteria: 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - The general category of C is Titlecase Letter (Lt) 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - In [CoreProps], C has one of the properties Uppercase, or Lowercase 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - Given D = NFD(C), then it is not the case that: 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (This third criterium does not add any characters to the list 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for Unicode 3.2. Ignored.) 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * D2. A character C is defined to be case-ignorable 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if it meets either of the following criteria: 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - The general category of C is 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Letter Modifier (Lm), or Symbol Modifier (Sk) 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - C is one of the following characters 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+0027 APOSTROPHE 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+00AD SOFT HYPHEN (SHY) 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+2019 RIGHT SINGLE QUOTATION MARK 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (the preferred character for apostrophe) 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * D3. A case-ignorable sequence is a sequence of 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * zero or more case-ignorable characters. 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_a(c) ((c)=='a' || (c)=='A') 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_d(c) ((c)=='d' || (c)=='D') 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_e(c) ((c)=='e' || (c)=='E') 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_i(c) ((c)=='i' || (c)=='I') 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_l(c) ((c)=='l' || (c)=='L') 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_n(c) ((c)=='n' || (c)=='N') 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_r(c) ((c)=='r' || (c)=='R') 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_t(c) ((c)=='t' || (c)=='T') 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_u(c) ((c)=='u' || (c)=='U') 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_z(c) ((c)=='z' || (c)=='Z') 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* separator? */ 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0) 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Requires non-NULL locale ID but otherwise does the equivalent of 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * checking for language codes as if uloc_getLanguage() were called: 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Accepts both 2- and 3-letter codes and accepts case variants. 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_getCaseLocale(const char *locale, int32_t *locCache) { 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t result; 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char c; 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(locCache!=NULL && (result=*locCache)!=UCASE_LOC_UNKNOWN) { 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=UCASE_LOC_ROOT; 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This function used to use uloc_getLanguage(), but the current code 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * removes the dependency of this low-level code on uloc implementation code 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and is faster because not the whole locale ID has to be 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * examined and copied/transformed. 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Because this code does not want to depend on uloc, the caller must 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_t(c)) { 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* tr or tur? */ 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_u(c)) { 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_r(c)) { 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale; 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_sep(c)) { 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=UCASE_LOC_TURKISH; 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(is_a(c)) { 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* az or aze? */ 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_z(c)) { 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_e(c)) { 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale; 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_sep(c)) { 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=UCASE_LOC_TURKISH; 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(is_l(c)) { 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* lt or lit? */ 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_i(c)) { 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_t(c)) { 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale; 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_sep(c)) { 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=UCASE_LOC_LITHUANIAN; 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(is_n(c)) { 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* nl or nld? */ 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_l(c)) { 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale++; 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_d(c)) { 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*locale; 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(is_sep(c)) { 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=UCASE_LOC_DUTCH; 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(locCache!=NULL) { 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *locCache=result; 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Is followed by 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * {case-ignorable}* cased 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ? 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (dir determines looking forward/backward) 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If a character is case-ignorable, it is skipped regardless of whether 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it is also cased or not. 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisFollowedByCasedLetter(const UCaseProps *csp, UCaseContextIterator *iter, void *context, int8_t dir) { 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(iter==NULL) { 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t type=ucase_getTypeOrIgnorable(csp, c); 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(type&4) { 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* case-ignorable, continue with the loop */ 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(type!=UCASE_NONE) { 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; /* followed by cased letter */ 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* uncased and not case-ignorable */ 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* not followed by cased letter */ 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPrecededBySoftDotted(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dotType; 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t dir; 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(iter==NULL) { 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(dir=-1; (c=iter(context, dir))>=0; dir=0) { 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dotType=getDotType(csp, c); 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dotType==UCASE_SOFT_DOTTED) { 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; /* preceded by TYPE_i */ 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(dotType!=UCASE_OTHER_ACCENT) { 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* not preceded by TYPE_i */ 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See Jitterbug 2344: 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The condition After_I for Turkic-lowercasing of U+0307 combining dot above 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * we made those releases compatible with Unicode 3.2 which had not fixed 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a related bug in SpecialCasing.txt. 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * From the Jitterbug 2344 text: 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ... this bug is listed as a Unicode erratum 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <quote> 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There are two errors in SpecialCasing.txt. 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1. Missing semicolons on two lines. ... [irrelevant for ICU] 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2. An incorrect context definition. Correct as follows: 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * --- 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * where the context After_I is defined as: 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The last preceding base character was an uppercase I, and there is no 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * intervening combining character class 230 (ABOVE). 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * </quote> 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * # This matches the behavior of the canonically equivalent I-dot_above 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See also the description in this place in older versions of uchar.c (revision 1.100). 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Markus W. Scherer 2003-feb-15 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Is preceded by base character 'I' with no intervening cc=230 ? */ 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisPrecededBy_I(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dotType; 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t dir; 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(iter==NULL) { 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(dir=-1; (c=iter(context, dir))>=0; dir=0) { 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x49) { 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; /* preceded by I */ 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dotType=getDotType(csp, c); 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dotType!=UCASE_OTHER_ACCENT) { 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* not preceded by I */ 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Is followed by one or more cc==230 ? */ 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisFollowedByMoreAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dotType; 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t dir; 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(iter==NULL) { 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(dir=1; (c=iter(context, dir))>=0; dir=0) { 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dotType=getDotType(csp, c); 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dotType==UCASE_ABOVE) { 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; /* at least one cc==230 following */ 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(dotType!=UCASE_OTHER_ACCENT) { 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* next base character, no more cc==230 following */ 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* no more cc==230 following */ 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Is followed by a dot above (without cc==230 in between) ? */ 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisFollowedByDotAbove(const UCaseProps *csp, UCaseContextIterator *iter, void *context) { 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dotType; 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t dir; 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(iter==NULL) { 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(dir=1; (c=iter(context, dir))>=0; dir=0) { 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x307) { 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dotType=getDotType(csp, c); 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dotType!=UCASE_OTHER_ACCENT) { 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* next base character or cc==230 in between */ 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* no dot above following */ 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_toFullLower(const UCaseProps *csp, UChar32 c, 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCaseContextIterator *iter, void *context, 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **pString, 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *locale, int32_t *locCache) 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 result=c; 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=c+UCASE_GET_DELTA(props); 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t full; 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe2=pe; 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use hardcoded conditions and mappings */ 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loc=ucase_getCaseLocale(locale, locCache); 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Test for conditional mappings first 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (otherwise the unconditional default mappings are always taken), 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then test for characters that have unconditional mappings in SpecialCasing.txt, 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then get the UnicodeData.txt mappings. 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( loc==UCASE_LOC_LITHUANIAN && 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* base characters, find accents above */ 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (((c==0x49 || c==0x4a || c==0x12e) && 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isFollowedByMoreAbove(csp, iter, context)) || 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* precomposed with accent above, no need to find one */ 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c==0xcc || c==0xcd || c==0x128)) 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Lithuanian 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Lithuanian retains the dot in a lowercase i when followed by accents. 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Introduce an explicit dot above when lowercasing capital I's and J's 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # whenever there are more accents above. 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(c) { 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x49: /* LATIN CAPITAL LETTER I */ 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDot; 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 2; 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x4a: /* LATIN CAPITAL LETTER J */ 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=jDot; 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 2; 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iOgonekDot; 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 2; 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDotGrave; 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 3; 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDotAcute; 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 3; 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDotTilde; 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 3; 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* will not occur */ 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* # Turkish and Azeri */ 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(loc==UCASE_LOC_TURKISH && c==0x130) { 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # The following rules handle those cases. 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x69; 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(csp, iter, context)) { 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # This matches the behavior of the canonically equivalent I-dot_above 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* remove the dot (continue without output) */ 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(csp, iter, context)) { 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x131; 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0x130) { 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Preserve canonical equivalence for I with dot. Turkic is handled below. 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDot; 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 2; 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if( c==0x3a3 && 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org !isFollowedByCasedLetter(csp, iter, context, 1) && 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isFollowedByCasedLetter(csp, iter, context, -1) /* -1=preceded */ 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Special case for final form of sigma 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x3c2; /* greek small final sigma */ 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no known conditional special case mapping, use a normal mapping */ 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full&=UCASE_FULL_LOWER; 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(full!=0) { 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the output pointer to the lowercase mapping */ 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=reinterpret_cast<const UChar *>(pe+1); 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* return the string length */ 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return full; 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result); 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (result==c) ? ~result : result; 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* internal */ 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtoUpperOrTitle(const UCaseProps *csp, UChar32 c, 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCaseContextIterator *iter, void *context, 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **pString, 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *locale, int32_t *locCache, 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool upperNotTitle) { 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 result=c; 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)==UCASE_LOWER) { 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=c+UCASE_GET_DELTA(props); 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t full, idx; 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe2=pe; 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use hardcoded conditions and mappings */ 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t loc=ucase_getCaseLocale(locale, locCache); 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(loc==UCASE_LOC_TURKISH && c==0x69) { 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Turkish and Azeri 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # The following rules handle those cases. 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # When uppercasing, i turns into a dotted capital I 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x130; 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(csp, iter, context)) { 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Lithuanian 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Lithuanian retains the dot in a lowercase i when followed by accents. 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org # Remove DOT ABOVE after "i" with upper or titlecase 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* remove the dot (continue without output) */ 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no known conditional special case mapping, use a normal mapping */ 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* start of full case mapping strings */ 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++pe; 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip the lowercase and case-folding result strings */ 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=full&UCASE_FULL_LOWER; 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full>>=4; 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=full&0xf; 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full>>=4; 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(upperNotTitle) { 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full&=0xf; 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip the uppercase result string */ 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=full&0xf; 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full=(full>>4)&0xf; 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(full!=0) { 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the output pointer to the result string */ 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=reinterpret_cast<const UChar *>(pe); 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* return the string length */ 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return full; 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) { 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_TITLE; 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* here, titlecase is same as uppercase */ 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_UPPER; 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ~c; 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, idx, pe2, result); 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (result==c) ? ~result : result; 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_toFullUpper(const UCaseProps *csp, UChar32 c, 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCaseContextIterator *iter, void *context, 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **pString, 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *locale, int32_t *locCache) { 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, TRUE); 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_toFullTitle(const UCaseProps *csp, UChar32 c, 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCaseContextIterator *iter, void *context, 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **pString, 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *locale, int32_t *locCache) { 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return toUpperOrTitle(csp, c, iter, context, pString, locale, locCache, FALSE); 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* case folding ------------------------------------------------------------- */ 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Case folding is similar to lowercasing. 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The result may be a simple mapping, i.e., a single code point, or 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a full mapping, i.e., a string. 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then only the lowercase mapping is stored. 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Some special cases are hardcoded because their conditions cannot be 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * parsed and processed from CaseFolding.txt. 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode 3.2 CaseFolding.txt specifies for its status field: 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# C: common case folding, common mappings shared by both simple and full mappings. 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# S: simple case folding, mappings to single characters where different from F. 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# T: special case for uppercase I and dotted uppercase I 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# - For non-Turkic languages, this mapping is normally not used. 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# Usage: 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# A. To do a simple case folding, use the mappings with status C + S. 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# B. To do a full case folding, use the mappings with status C + F. 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# The mappings with status T can be used or omitted depending on the desired case-folding 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org# behavior. (The default option is to exclude them.) 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode 3.2 has 'T' mappings as follows: 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org0049; T; 0131; # LATIN CAPITAL LETTER I 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * while the default mappings for these code points are: 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org0049; C; 0069; # LATIN CAPITAL LETTER I 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+0130 has no simple case folding (simple-case-folds to itself). 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* return the simple case folding mapping for c */ 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options) { 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=UCASE_GET_DELTA(props); 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props); 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t idx; 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* special case folding mappings, hardcoded */ 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* default mappings */ 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x49) { 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x69; 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0x130) { 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no simple case folding for U+0130 */ 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Turkic mappings */ 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x49) { 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x131; 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0x130) { 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x69; 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_FOLD; 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_LOWER; 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, idx, pe, c); 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Issue for canonical caseless match (UAX #21): 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * canonical equivalence, unlike default-option casefolding. 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For example, I-grave and I + grave fold to strings that are not canonically 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * equivalent. 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For more details, see the comment in unorm_compare() in unorm.cpp 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and the intermediate prototype changes for Jitterbug 2021. 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This did not get fixed because it appears that it is not possible to fix 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * it for uppercase and lowercase characters (I-grave vs. i-grave) 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * together in a way that they still fold to common result strings. 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_toFullFolding(const UCaseProps *csp, UChar32 c, 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **pString, 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t options) 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 result=c; 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t props=UTRIE2_GET16(&csp->trie, c); 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!PROPS_HAS_EXCEPTION(props)) { 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCASE_GET_TYPE(props)>=UCASE_UPPER) { 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=c+UCASE_GET_DELTA(props); 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *pe=GET_EXCEPTIONS(csp, props), *pe2; 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t excWord=*pe++; 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t full, idx; 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe2=pe; 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use hardcoded conditions and mappings */ 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* default mappings */ 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x49) { 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x69; 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0x130) { 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=iDot; 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 2; 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Turkic mappings */ 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0x49) { 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x131; 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0x130) { 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x69; 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* start of full case mapping strings */ 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++pe; 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip the lowercase result string */ 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pe+=full&UCASE_FULL_LOWER; 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org full=(full>>4)&0xf; 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(full!=0) { 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the output pointer to the result string */ 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pString=reinterpret_cast<const UChar *>(pe); 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* return the string length */ 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return full; 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_FOLD; 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org idx=UCASE_EXC_LOWER; 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ~c; 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org GET_SLOT_VALUE(excWord, idx, pe2, result); 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (result==c) ? ~result : result; 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* case mapping properties API ---------------------------------------------- */ 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define GET_CASE_PROPS() &ucase_props_singleton 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* public API (see uchar.h) */ 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_isULowercase(UChar32 c) { 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(UCASE_LOWER==ucase_getType(GET_CASE_PROPS(), c)); 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_isUUppercase(UChar32 c) { 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(UCASE_UPPER==ucase_getType(GET_CASE_PROPS(), c)); 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Transforms the Unicode character to its lower case equivalent.*/ 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_tolower(UChar32 c) { 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_tolower(GET_CASE_PROPS(), c); 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Transforms the Unicode character to its upper case equivalent.*/ 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_toupper(UChar32 c) { 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_toupper(GET_CASE_PROPS(), c); 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Transforms the Unicode character to its title case equivalent.*/ 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_totitle(UChar32 c) { 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_totitle(GET_CASE_PROPS(), c); 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* return the simple case folding mapping for c */ 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UChar32 U_EXPORT2 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgu_foldCase(UChar32 c, uint32_t options) { 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_fold(GET_CASE_PROPS(), c, options); 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t U_EXPORT2 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucase_hasBinaryProperty(UChar32 c, UProperty which) { 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* case mapping properties */ 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *resultString; 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t locCache; 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UCaseProps *csp=GET_CASE_PROPS(); 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(csp==NULL) { 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(which) { 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_LOWERCASE: 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(UCASE_LOWER==ucase_getType(csp, c)); 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_UPPERCASE: 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(UCASE_UPPER==ucase_getType(csp, c)); 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_SOFT_DOTTED: 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_isSoftDotted(csp, c); 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CASE_SENSITIVE: 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucase_isCaseSensitive(csp, c); 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CASED: 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(UCASE_NONE!=ucase_getType(csp, c)); 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CASE_IGNORABLE: 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(ucase_getTypeOrIgnorable(csp, c)>>2); 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note: The following Changes_When_Xyz are defined as testing whether 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the NFD form of the input changes when Xyz-case-mapped. 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * However, this simpler implementation of these properties, 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ignoring NFD, passes the tests. 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The implementation needs to be changed if the tests start failing. 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When that happens, optimizations should be used to work with the 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * per-single-code point ucase_toFullXyz() functions unless 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the NFD form has more than one code point, 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and the property starts set needs to be the union of the 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * start sets for normalization and case mappings. 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CHANGES_WHEN_LOWERCASED: 13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org locCache=UCASE_LOC_ROOT; 13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); 13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CHANGES_WHEN_UPPERCASED: 13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org locCache=UCASE_LOC_ROOT; 13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); 13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CHANGES_WHEN_TITLECASED: 13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org locCache=UCASE_LOC_ROOT; 13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); 13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ 13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCHAR_CHANGES_WHEN_CASEMAPPED: 13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org locCache=UCASE_LOC_ROOT; 13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)( 13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_toFullLower(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || 13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_toFullUpper(csp, c, NULL, NULL, &resultString, "", &locCache)>=0 || 13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucase_toFullTitle(csp, c, NULL, NULL, &resultString, "", &locCache)>=0); 13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1322