1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 459d709d503bab6e2b61931737e662dd293b40578ccornelius* Copyright (C) 2002-2013, International Business Machines 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: uprops.cpp 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* encoding: US-ASCII 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* tab size: 8 (not used) 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* indentation:4 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created on: 2002feb24 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* created by: Markus W. Scherer 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Implementations for mostly non-core Unicode character properties 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* stored in uprops.icu. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* With the APIs implemented here, almost all properties files and 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* their associated implementation files are used from this file, 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* including those for normalization and case mappings. 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 2627f654740f2a26ad62a5c155af9199af9e69b889claireho#include "unicode/unorm2.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h" 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/ustring.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_cmn.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "umutex.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ubidi_props.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uprops.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucase.h" 3627f654740f2a26ad62a5c155af9199af9e69b889claireho#include "ustr_imp.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_USE 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 4227f654740f2a26ad62a5c155af9199af9e69b889claireho#define GET_BIDI_PROPS() ubidi_getSingleton() 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4427f654740f2a26ad62a5c155af9199af9e69b889claireho/* general properties API functions ----------------------------------------- */ 4527f654740f2a26ad62a5c155af9199af9e69b889claireho 4627f654740f2a26ad62a5c155af9199af9e69b889clairehostruct BinaryProperty; 4727f654740f2a26ad62a5c155af9199af9e69b889claireho 4827f654740f2a26ad62a5c155af9199af9e69b889clairehotypedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); 4927f654740f2a26ad62a5c155af9199af9e69b889claireho 5027f654740f2a26ad62a5c155af9199af9e69b889clairehostruct BinaryProperty { 5127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 5227f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t mask; 5327f654740f2a26ad62a5c155af9199af9e69b889claireho BinaryPropertyContains *contains; 5427f654740f2a26ad62a5c155af9199af9e69b889claireho}; 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { 5727f654740f2a26ad62a5c155af9199af9e69b889claireho /* systematic, directly stored properties */ 5827f654740f2a26ad62a5c155af9199af9e69b889claireho return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 6227f654740f2a26ad62a5c155af9199af9e69b889claireho return ucase_hasBinaryProperty(c, which); 6327f654740f2a26ad62a5c155af9199af9e69b889claireho} 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 6627f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_isBidiControl(GET_BIDI_PROPS(), c); 6727f654740f2a26ad62a5c155af9199af9e69b889claireho} 6827f654740f2a26ad62a5c155af9199af9e69b889claireho 6927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 7027f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_isMirrored(GET_BIDI_PROPS(), c); 7127f654740f2a26ad62a5c155af9199af9e69b889claireho} 7227f654740f2a26ad62a5c155af9199af9e69b889claireho 7327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 7427f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_isJoinControl(GET_BIDI_PROPS(), c); 7527f654740f2a26ad62a5c155af9199af9e69b889claireho} 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7727f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 7827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { 7927f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 8027f654740f2a26ad62a5c155af9199af9e69b889claireho} 8127f654740f2a26ad62a5c155af9199af9e69b889claireho#else 8227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 8327f654740f2a26ad62a5c155af9199af9e69b889claireho // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 8527f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 8627f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); 8727f654740f2a26ad62a5c155af9199af9e69b889claireho} 8827f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9027f654740f2a26ad62a5c155af9199af9e69b889claireho// UCHAR_NF*_INERT properties 9127f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 9227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { 9327f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 9427f654740f2a26ad62a5c155af9199af9e69b889claireho} 9527f654740f2a26ad62a5c155af9199af9e69b889claireho#else 9627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 9727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode=U_ZERO_ERROR; 9827f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2 *norm2=Normalizer2Factory::getInstance( 9927f654740f2a26ad62a5c155af9199af9e69b889claireho (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); 10027f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(errorCode) && norm2->isInert(c); 10127f654740f2a26ad62a5c155af9199af9e69b889claireho} 10227f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 10327f654740f2a26ad62a5c155af9199af9e69b889claireho 10427f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 10527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { 10627f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 10727f654740f2a26ad62a5c155af9199af9e69b889claireho} 10827f654740f2a26ad62a5c155af9199af9e69b889claireho#else 10927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 11027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString nfd; 11127f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode=U_ZERO_ERROR; 11227f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 11427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 11527f654740f2a26ad62a5c155af9199af9e69b889claireho } 11627f654740f2a26ad62a5c155af9199af9e69b889claireho if(nfcNorm2->getDecomposition(c, nfd)) { 11727f654740f2a26ad62a5c155af9199af9e69b889claireho /* c has a decomposition */ 11827f654740f2a26ad62a5c155af9199af9e69b889claireho if(nfd.length()==1) { 11927f654740f2a26ad62a5c155af9199af9e69b889claireho c=nfd[0]; /* single BMP code point */ 12027f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(nfd.length()<=U16_MAX_LENGTH && 12127f654740f2a26ad62a5c155af9199af9e69b889claireho nfd.length()==U16_LENGTH(c=nfd.char32At(0)) 12227f654740f2a26ad62a5c155af9199af9e69b889claireho ) { 12327f654740f2a26ad62a5c155af9199af9e69b889claireho /* single supplementary code point */ 12427f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 12527f654740f2a26ad62a5c155af9199af9e69b889claireho c=U_SENTINEL; 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 12727f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(c<0) { 12827f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; /* protect against bad input */ 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 13027f654740f2a26ad62a5c155af9199af9e69b889claireho if(c>=0) { 13127f654740f2a26ad62a5c155af9199af9e69b889claireho /* single code point */ 13227f654740f2a26ad62a5c155af9199af9e69b889claireho const UCaseProps *csp=ucase_getSingleton(); 13327f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *resultString; 13427f654740f2a26ad62a5c155af9199af9e69b889claireho return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); 13527f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 13627f654740f2a26ad62a5c155af9199af9e69b889claireho /* guess some large but stack-friendly capacity */ 13727f654740f2a26ad62a5c155af9199af9e69b889claireho UChar dest[2*UCASE_MAX_STRING_LENGTH]; 13827f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t destLength; 13927f654740f2a26ad62a5c155af9199af9e69b889claireho destLength=u_strFoldCase(dest, LENGTHOF(dest), 14027f654740f2a26ad62a5c155af9199af9e69b889claireho nfd.getBuffer(), nfd.length(), 14127f654740f2a26ad62a5c155af9199af9e69b889claireho U_FOLD_CASE_DEFAULT, &errorCode); 14227f654740f2a26ad62a5c155af9199af9e69b889claireho return (UBool)(U_SUCCESS(errorCode) && 14327f654740f2a26ad62a5c155af9199af9e69b889claireho 0!=u_strCompare(nfd.getBuffer(), nfd.length(), 14427f654740f2a26ad62a5c155af9199af9e69b889claireho dest, destLength, FALSE)); 14527f654740f2a26ad62a5c155af9199af9e69b889claireho } 14627f654740f2a26ad62a5c155af9199af9e69b889claireho} 147c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#endif 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 14927f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 15027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { 15127f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 15227f654740f2a26ad62a5c155af9199af9e69b889claireho} 15327f654740f2a26ad62a5c155af9199af9e69b889claireho#else 15427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 15527f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode=U_ZERO_ERROR; 15627f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); 15727f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(errorCode)) { 15827f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 15927f654740f2a26ad62a5c155af9199af9e69b889claireho } 16027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString src(c); 16127f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString dest; 16227f654740f2a26ad62a5c155af9199af9e69b889claireho { 16327f654740f2a26ad62a5c155af9199af9e69b889claireho // The ReorderingBuffer must be in a block because its destructor 16427f654740f2a26ad62a5c155af9199af9e69b889claireho // needs to release dest's buffer before we look at its contents. 16527f654740f2a26ad62a5c155af9199af9e69b889claireho ReorderingBuffer buffer(*kcf, dest); 16627f654740f2a26ad62a5c155af9199af9e69b889claireho // Small destCapacity for NFKC_CF(c). 16727f654740f2a26ad62a5c155af9199af9e69b889claireho if(buffer.init(5, errorCode)) { 16827f654740f2a26ad62a5c155af9199af9e69b889claireho const UChar *srcArray=src.getBuffer(); 16927f654740f2a26ad62a5c155af9199af9e69b889claireho kcf->compose(srcArray, srcArray+src.length(), FALSE, 17027f654740f2a26ad62a5c155af9199af9e69b889claireho TRUE, buffer, errorCode); 17127f654740f2a26ad62a5c155af9199af9e69b889claireho } 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 17327f654740f2a26ad62a5c155af9199af9e69b889claireho return U_SUCCESS(errorCode) && dest!=src; 17427f654740f2a26ad62a5c155af9199af9e69b889claireho} 17527f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 17627f654740f2a26ad62a5c155af9199af9e69b889claireho 17727f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 17827f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { 17927f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 18027f654740f2a26ad62a5c155af9199af9e69b889claireho} 18127f654740f2a26ad62a5c155af9199af9e69b889claireho#else 18227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 18327f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode=U_ZERO_ERROR; 18427f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 18527f654740f2a26ad62a5c155af9199af9e69b889claireho return 18627f654740f2a26ad62a5c155af9199af9e69b889claireho U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && 18727f654740f2a26ad62a5c155af9199af9e69b889claireho impl->isCanonSegmentStarter(c); 18827f654740f2a26ad62a5c155af9199af9e69b889claireho} 18927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 19227f654740f2a26ad62a5c155af9199af9e69b889claireho return u_isalnumPOSIX(c); 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 19627f654740f2a26ad62a5c155af9199af9e69b889claireho return u_isblank(c); 19727f654740f2a26ad62a5c155af9199af9e69b889claireho} 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 20027f654740f2a26ad62a5c155af9199af9e69b889claireho return u_isgraphPOSIX(c); 20127f654740f2a26ad62a5c155af9199af9e69b889claireho} 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 20327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 20427f654740f2a26ad62a5c155af9199af9e69b889claireho return u_isprintPOSIX(c); 20527f654740f2a26ad62a5c155af9199af9e69b889claireho} 20627f654740f2a26ad62a5c155af9199af9e69b889claireho 20727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 20827f654740f2a26ad62a5c155af9199af9e69b889claireho return u_isxdigit(c); 20927f654740f2a26ad62a5c155af9199af9e69b889claireho} 21027f654740f2a26ad62a5c155af9199af9e69b889claireho 21127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * column and mask values for binary properties from u_getUnicodeProperties(). 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Must be in order of corresponding UProperty, 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and there must be exactly one entry per binary UProperty. 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 21783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Properties with mask==0 are handled in code. 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For them, column is the UPropertySource value. 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 22027f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, 22127f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, 22227f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, isBidiControl }, 22327f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, isMirrored }, 22427f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_DASH), defaultContains }, 22527f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, 22627f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, 22727f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, 22827f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, 22927f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, 23027f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, 23127f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, 23227f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, 23327f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, 23427f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, 23527f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, 23627f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_ID_START), defaultContains }, 23727f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, 23827f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, 23927f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, 24027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, isJoinControl }, 24127f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, 24227f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE 24327f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_MATH), defaultContains }, 24427f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, 24527f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, 24627f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_RADICAL), defaultContains }, 24727f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED 24827f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, 24927f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, 25027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE 25127f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, 25227f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, 25327f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_XID_START), defaultContains }, 25427f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE 25527f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_S_TERM), defaultContains }, 25627f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, 25727f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT 25827f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT 25927f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT 26027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT 26127f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, 26227f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, 26327f654740f2a26ad62a5c155af9199af9e69b889claireho { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, 26427f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, 26527f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, 26627f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, 26727f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, isPOSIX_print }, 26827f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, 26927f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED 27027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE 27127f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED 27227f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED 27327f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED 27427f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, 27527f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED 27627f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_hasBinaryProperty(UChar32 c, UProperty which) { 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* c is range-checked in the functions that are called from here */ 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* not a known binary property */ 28427f654740f2a26ad62a5c155af9199af9e69b889claireho return FALSE; 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 28627f654740f2a26ad62a5c155af9199af9e69b889claireho const BinaryProperty &prop=binProps[which]; 28727f654740f2a26ad62a5c155af9199af9e69b889claireho return prop.contains(prop, c, which); 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 29127f654740f2a26ad62a5c155af9199af9e69b889clairehostruct IntProperty; 29227f654740f2a26ad62a5c155af9199af9e69b889claireho 29327f654740f2a26ad62a5c155af9199af9e69b889clairehotypedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); 29427f654740f2a26ad62a5c155af9199af9e69b889clairehotypedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); 29527f654740f2a26ad62a5c155af9199af9e69b889claireho 29627f654740f2a26ad62a5c155af9199af9e69b889clairehostruct IntProperty { 29727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 29827f654740f2a26ad62a5c155af9199af9e69b889claireho uint32_t mask; 29927f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t shift; // =maxValue if getMaxValueFromShift() is used 30027f654740f2a26ad62a5c155af9199af9e69b889claireho IntPropertyGetValue *getValue; 30127f654740f2a26ad62a5c155af9199af9e69b889claireho IntPropertyGetMaxValue *getMaxValue; 30227f654740f2a26ad62a5c155af9199af9e69b889claireho}; 30327f654740f2a26ad62a5c155af9199af9e69b889claireho 30427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { 30527f654740f2a26ad62a5c155af9199af9e69b889claireho /* systematic, directly stored properties */ 30627f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; 30727f654740f2a26ad62a5c155af9199af9e69b889claireho} 30827f654740f2a26ad62a5c155af9199af9e69b889claireho 30927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { 31027f654740f2a26ad62a5c155af9199af9e69b889claireho return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; 31127f654740f2a26ad62a5c155af9199af9e69b889claireho} 31227f654740f2a26ad62a5c155af9199af9e69b889claireho 31327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { 31427f654740f2a26ad62a5c155af9199af9e69b889claireho return prop.shift; 31527f654740f2a26ad62a5c155af9199af9e69b889claireho} 31627f654740f2a26ad62a5c155af9199af9e69b889claireho 31727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 31827f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)u_charDirection(c); 31927f654740f2a26ad62a5c155af9199af9e69b889claireho} 32027f654740f2a26ad62a5c155af9199af9e69b889claireho 32159d709d503bab6e2b61931737e662dd293b40578ccorneliusstatic int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 32259d709d503bab6e2b61931737e662dd293b40578ccornelius return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); 32359d709d503bab6e2b61931737e662dd293b40578ccornelius} 32459d709d503bab6e2b61931737e662dd293b40578ccornelius 32527f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { 32627f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_getMaxValue(GET_BIDI_PROPS(), which); 32727f654740f2a26ad62a5c155af9199af9e69b889claireho} 32827f654740f2a26ad62a5c155af9199af9e69b889claireho 32927f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 33027f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { 33127f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 33227f654740f2a26ad62a5c155af9199af9e69b889claireho} 33327f654740f2a26ad62a5c155af9199af9e69b889claireho#else 33427f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 33527f654740f2a26ad62a5c155af9199af9e69b889claireho return u_getCombiningClass(c); 33627f654740f2a26ad62a5c155af9199af9e69b889claireho} 33727f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 33827f654740f2a26ad62a5c155af9199af9e69b889claireho 33927f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 34027f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)u_charType(c); 34127f654740f2a26ad62a5c155af9199af9e69b889claireho} 34227f654740f2a26ad62a5c155af9199af9e69b889claireho 34327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 34427f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); 34527f654740f2a26ad62a5c155af9199af9e69b889claireho} 34627f654740f2a26ad62a5c155af9199af9e69b889claireho 34727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 34827f654740f2a26ad62a5c155af9199af9e69b889claireho return ubidi_getJoiningType(GET_BIDI_PROPS(), c); 34927f654740f2a26ad62a5c155af9199af9e69b889claireho} 35027f654740f2a26ad62a5c155af9199af9e69b889claireho 35127f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 35283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); 35327f654740f2a26ad62a5c155af9199af9e69b889claireho return UPROPS_NTV_GET_TYPE(ntv); 35427f654740f2a26ad62a5c155af9199af9e69b889claireho} 35527f654740f2a26ad62a5c155af9199af9e69b889claireho 35627f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 35727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode=U_ZERO_ERROR; 35827f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)uscript_getScript(c, &errorCode); 35927f654740f2a26ad62a5c155af9199af9e69b889claireho} 36027f654740f2a26ad62a5c155af9199af9e69b889claireho 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic const UHangulSyllableType gcbToHst[]={ 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_NOT_APPLICABLE, /* U_GCB_CR */ 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_LEADING_JAMO, /* U_GCB_L */ 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_NOT_APPLICABLE, /* U_GCB_LF */ 37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_LV_SYLLABLE, /* U_GCB_LV */ 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_TRAILING_JAMO, /* U_GCB_T */ 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho U_HST_VOWEL_JAMO /* U_GCB_V */ 37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Omit GCB values beyond what we need for hst. 37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The code below checks for the array length. 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 38227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 38327f654740f2a26ad62a5c155af9199af9e69b889claireho /* see comments on gcbToHst[] above */ 38427f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; 38527f654740f2a26ad62a5c155af9199af9e69b889claireho if(gcb<LENGTHOF(gcbToHst)) { 38627f654740f2a26ad62a5c155af9199af9e69b889claireho return gcbToHst[gcb]; 38727f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 38827f654740f2a26ad62a5c155af9199af9e69b889claireho return U_HST_NOT_APPLICABLE; 38927f654740f2a26ad62a5c155af9199af9e69b889claireho } 39027f654740f2a26ad62a5c155af9199af9e69b889claireho} 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39227f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 39327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { 39427f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 39527f654740f2a26ad62a5c155af9199af9e69b889claireho} 39627f654740f2a26ad62a5c155af9199af9e69b889claireho#else 39727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { 39827f654740f2a26ad62a5c155af9199af9e69b889claireho return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); 39927f654740f2a26ad62a5c155af9199af9e69b889claireho} 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 40127f654740f2a26ad62a5c155af9199af9e69b889claireho 40227f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 40327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { 40427f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 40527f654740f2a26ad62a5c155af9199af9e69b889claireho} 40627f654740f2a26ad62a5c155af9199af9e69b889claireho#else 40727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 40883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return unorm_getFCD16(c)>>8; 40927f654740f2a26ad62a5c155af9199af9e69b889claireho} 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 41127f654740f2a26ad62a5c155af9199af9e69b889claireho 41227f654740f2a26ad62a5c155af9199af9e69b889claireho#if UCONFIG_NO_NORMALIZATION 41327f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { 41427f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; 41527f654740f2a26ad62a5c155af9199af9e69b889claireho} 41627f654740f2a26ad62a5c155af9199af9e69b889claireho#else 41727f654740f2a26ad62a5c155af9199af9e69b889clairehostatic int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 41883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return unorm_getFCD16(c)&0xff; 41927f654740f2a26ad62a5c155af9199af9e69b889claireho} 42027f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 42127f654740f2a26ad62a5c155af9199af9e69b889claireho 42227f654740f2a26ad62a5c155af9199af9e69b889clairehostatic const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ 42327f654740f2a26ad62a5c155af9199af9e69b889claireho /* 42427f654740f2a26ad62a5c155af9199af9e69b889claireho * column, mask and shift values for int-value properties from u_getUnicodeProperties(). 42527f654740f2a26ad62a5c155af9199af9e69b889claireho * Must be in order of corresponding UProperty, 42627f654740f2a26ad62a5c155af9199af9e69b889claireho * and there must be exactly one entry per int UProperty. 42727f654740f2a26ad62a5c155af9199af9e69b889claireho * 42883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Properties with mask==0 are handled in code. 42927f654740f2a26ad62a5c155af9199af9e69b889claireho * For them, column is the UPropertySource value. 43027f654740f2a26ad62a5c155af9199af9e69b889claireho */ 43127f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, 43227f654740f2a26ad62a5c155af9199af9e69b889claireho { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, 43327f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, 43427f654740f2a26ad62a5c155af9199af9e69b889claireho { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, 43527f654740f2a26ad62a5c155af9199af9e69b889claireho { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, 43627f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, 43727f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, 43827f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, 43927f654740f2a26ad62a5c155af9199af9e69b889claireho { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, 44027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, 44127f654740f2a26ad62a5c155af9199af9e69b889claireho { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, 44227f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, 44327f654740f2a26ad62a5c155af9199af9e69b889claireho // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 44427f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 44527f654740f2a26ad62a5c155af9199af9e69b889claireho // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 44627f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 44727f654740f2a26ad62a5c155af9199af9e69b889claireho // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE 44827f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 44927f654740f2a26ad62a5c155af9199af9e69b889claireho // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE 45027f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 45127f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, 45227f654740f2a26ad62a5c155af9199af9e69b889claireho { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, 45327f654740f2a26ad62a5c155af9199af9e69b889claireho { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, 45427f654740f2a26ad62a5c155af9199af9e69b889claireho { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, 45559d709d503bab6e2b61931737e662dd293b40578ccornelius { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, 45659d709d503bab6e2b61931737e662dd293b40578ccornelius { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, 45727f654740f2a26ad62a5c155af9199af9e69b889claireho}; 45827f654740f2a26ad62a5c155af9199af9e69b889claireho 45927f654740f2a26ad62a5c155af9199af9e69b889clairehoU_CAPI int32_t U_EXPORT2 46027f654740f2a26ad62a5c155af9199af9e69b889clairehou_getIntPropertyValue(UChar32 c, UProperty which) { 46127f654740f2a26ad62a5c155af9199af9e69b889claireho if(which<UCHAR_INT_START) { 46227f654740f2a26ad62a5c155af9199af9e69b889claireho if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 46327f654740f2a26ad62a5c155af9199af9e69b889claireho const BinaryProperty &prop=binProps[which]; 46427f654740f2a26ad62a5c155af9199af9e69b889claireho return prop.contains(prop, c, which); 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 46627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(which<UCHAR_INT_LIMIT) { 46727f654740f2a26ad62a5c155af9199af9e69b889claireho const IntProperty &prop=intProps[which-UCHAR_INT_START]; 46827f654740f2a26ad62a5c155af9199af9e69b889claireho return prop.getValue(prop, c, which); 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_MASK(u_charType(c)); 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 47227f654740f2a26ad62a5c155af9199af9e69b889claireho return 0; // undefined 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 47627f654740f2a26ad62a5c155af9199af9e69b889clairehou_getIntPropertyMinValue(UProperty /*which*/) { 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; /* all binary/enum/int properties have a minimum value of 0 */ 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruu_getIntPropertyMaxValue(UProperty which) { 48227f654740f2a26ad62a5c155af9199af9e69b889claireho if(which<UCHAR_INT_START) { 48327f654740f2a26ad62a5c155af9199af9e69b889claireho if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 48427f654740f2a26ad62a5c155af9199af9e69b889claireho return 1; // maximum TRUE for all binary properties 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 48627f654740f2a26ad62a5c155af9199af9e69b889claireho } else if(which<UCHAR_INT_LIMIT) { 48727f654740f2a26ad62a5c155af9199af9e69b889claireho const IntProperty &prop=intProps[which-UCHAR_INT_START]; 48827f654740f2a26ad62a5c155af9199af9e69b889claireho return prop.getMaxValue(prop, which); 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 49027f654740f2a26ad62a5c155af9199af9e69b889claireho return -1; // undefined 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC UPropertySource U_EXPORT2 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruuprops_getSource(UProperty which) { 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(which<UCHAR_BINARY_START) { 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_NONE; /* undefined */ 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which<UCHAR_BINARY_LIMIT) { 49827f654740f2a26ad62a5c155af9199af9e69b889claireho const BinaryProperty &prop=binProps[which]; 49927f654740f2a26ad62a5c155af9199af9e69b889claireho if(prop.mask!=0) { 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_PROPSVEC; 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 50227f654740f2a26ad62a5c155af9199af9e69b889claireho return (UPropertySource)prop.column; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which<UCHAR_INT_START) { 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_NONE; /* undefined */ 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which<UCHAR_INT_LIMIT) { 50727f654740f2a26ad62a5c155af9199af9e69b889claireho const IntProperty &prop=intProps[which-UCHAR_INT_START]; 50827f654740f2a26ad62a5c155af9199af9e69b889claireho if(prop.mask!=0) { 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_PROPSVEC; 51027f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 51127f654740f2a26ad62a5c155af9199af9e69b889claireho return (UPropertySource)prop.column; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which<UCHAR_STRING_START) { 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(which) { 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_GENERAL_CATEGORY_MASK: 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_NUMERIC_VALUE: 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_CHAR; 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_NONE; 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(which<UCHAR_STRING_LIMIT) { 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(which) { 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_AGE: 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_PROPSVEC; 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_BIDI_MIRRORING_GLYPH: 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_BIDI; 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_CASE_FOLDING: 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_LOWERCASE_MAPPING: 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_SIMPLE_CASE_FOLDING: 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_SIMPLE_LOWERCASE_MAPPING: 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_SIMPLE_TITLECASE_MAPPING: 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_SIMPLE_UPPERCASE_MAPPING: 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_TITLECASE_MAPPING: 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_UPPERCASE_MAPPING: 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_CASE; 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_ISO_COMMENT: 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_NAME: 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case UCHAR_UNICODE_1_NAME: 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_NAMES; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return UPROPS_SRC_NONE; 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 54927f654740f2a26ad62a5c155af9199af9e69b889claireho switch(which) { 55027f654740f2a26ad62a5c155af9199af9e69b889claireho case UCHAR_SCRIPT_EXTENSIONS: 55127f654740f2a26ad62a5c155af9199af9e69b889claireho return UPROPS_SRC_PROPSVEC; 55227f654740f2a26ad62a5c155af9199af9e69b889claireho default: 55327f654740f2a26ad62a5c155af9199af9e69b889claireho return UPROPS_SRC_NONE; /* undefined */ 55427f654740f2a26ad62a5c155af9199af9e69b889claireho } 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 55950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 56050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2 56150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehou_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { 56250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 56350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 56450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(destCapacity<0 || (dest==NULL && destCapacity>0)) { 56650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 56750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 56850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 56950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Compute the FC_NFKC_Closure on the fly: 57050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // We have the API for complete coverage of Unicode properties, although 57150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // this value by itself is not useful via API. 57250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // (What could be useful is a custom normalization table that combines 57350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // case folding and NFKC.) 57450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // For the derivation, see Unicode's DerivedNormalizationProps.txt. 57550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); 57627f654740f2a26ad62a5c155af9199af9e69b889claireho const UCaseProps *csp=ucase_getSingleton(); 57750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode)) { 57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return 0; 57950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 58050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // first: b = NFKC(Fold(a)) 58150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString folded1String; 58250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *folded1; 58350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); 58450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(folded1Length<0) { 58550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); 58650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { 58750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC 58850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 58950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho folded1String.setTo(c); 59050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 59150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(folded1Length>UCASE_MAX_STRING_LENGTH) { 59250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho folded1String.setTo(folded1Length); 59350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 59450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho folded1String.setTo(FALSE, folded1, folded1Length); 59550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 59650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 59750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); 59850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // second: c = NFKC(Fold(b)) 59950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString folded2String(kc1); 60050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); 60150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // if (c != b) add the mapping from a to c 60250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(*pErrorCode) || kc1==kc2) { 60350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 60450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 60550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return kc2.extract(dest, destCapacity, *pErrorCode); 60650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 60750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 610