normalizer2.h revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 2009-2010, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: normalizer2.h 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009nov22 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __NORMALIZER2_H__ 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __NORMALIZER2_H__ 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C++ API: New API for Unicode Normalization. 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h" 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unorm2.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables. 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of this class are unmodifiable/immutable. 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by getInstance() are singletons that must not be deleted by the caller. 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The primary functions are to produce a normalized string and to detect whether 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a string is already normalized. 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The most commonly used normalization forms are those defined in 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * http://www.unicode.org/unicode/reports/tr15/ 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * However, this API supports additional normalization forms for specialized purposes. 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and can be used in implementations of UTS #46. 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not only are the standard compose and decompose modes supplied, 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * but additional modes are provided as documented in the Mode enum. 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Some of the functions in this class identify normalization boundaries. 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * At a normalization boundary, the portions of the string 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * before it and starting from it do not interact and can be handled independently. 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The spanQuickCheckYes() stops at a normalization boundary. 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string, then the text before the boundary 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a character is guaranteed to be at a normalization boundary, 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for moving from one normalization boundary to the next 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or preceding boundary, and for performing iterative normalization. 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Iterative normalization is useful when only a small portion of a 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * longer string needs to be processed. 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to process only the substring for which sort key bytes are computed). 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The set of normalization boundaries returned by these functions may not be 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * complete: There may be more boundaries that could be returned. 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Different functions may return different boundaries. 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API Normalizer2 : public UObject { 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns a Normalizer2 instance which uses the specified data file 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and which composes or decomposes text according to the specified mode. 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns an unmodifiable singleton instance. Do not delete it. 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use packageName=NULL for data files that are part of ICU's own data. 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param packageName NULL for ICU built-in data, otherwise application data package name 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param mode normalization mode (compose or decompose etc.) 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested Normalizer2, if successful 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const Normalizer2 * 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getInstance(const char *packageName, 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *name, 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalization2Mode mode, 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode); 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the normalized form of the source string. 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return normalized src 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, UErrorCode &errorCode) const { 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(src, result, errorCode); 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the destination string. 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different objects. 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized. 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized. 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(UnicodeString &first, 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Internally, in cases where the quickCheck() method would return "maybe" 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is only possible for the two COMPOSE modes) this method 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * resolves to "yes" or "no" to provide a definitive result, 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * at the cost of doing more work in those cases. 18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For the two COMPOSE modes, the result could be "maybe" in cases that 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would take a little more work to resolve definitively. 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * combination of quick check + normalization, to avoid 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * re-checking the "yes" prefix. 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UNormalizationCheckResult 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string. 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the substring <code>UnicodeString(s, 0, end)</code> 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will pass the quick check with a "yes" result. 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The returned end index is usually one or more characters before the 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "no" or "maybe" character: The end index is at a normalization boundary. 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (See the class documentation for more about normalization boundaries.) 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string and most input strings are expected 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to be normalized already, then call this method, 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and if it returns a prefix shorter than the input string, 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * copy that prefix and use normalizeSecondAndAppend() for the remainder. 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it, 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not normalization-interact with 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * preceding characters. 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions before this character and starting from this 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it, 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not normalization-interact with 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * following characters. 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions up to this character and after this 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that this operation may be significantly slower than hasBoundaryBefore(). 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert. 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not change, nor normalization-interact with 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * preceding or following characters. 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions before this character and after this 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that this operation may be significantly slower than hasBoundaryBefore(). 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool isInert(UChar32 c) const = 0; 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ICU "poor man's RTTI", returns a UClassID for this class. 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @returns a UClassID for this class. 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static UClassID U_EXPORT2 getStaticClassID(); 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ICU "poor man's RTTI", returns a UClassID for the actual class. 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return a UClassID for the actual class. 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UClassID getDynamicClassID() const = 0; 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalization filtered by a UnicodeSet. 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalizes portions of the text contained in the filter set and leaves 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * portions not contained in the filter set unchanged. 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This class implements all of (and only) the Normalizer2 API. 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * An instance of this class is unmodifiable/immutable but is constructed and 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * must be destructed by the owner. 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constructs a filtered normalizer wrapping any Normalizer2 instance 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a filter set. 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Both are aliased and must not be modified or deleted while this object 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * is used. 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The filter set should be frozen; otherwise the performance will suffer greatly. 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param n2 wrapped Normalizer2 instance 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param filterSet UnicodeSet which determines the characters to be normalized 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2(n2), set(filterSet) {} 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the destination string. 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different objects. 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized. 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized. 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(UnicodeString &first, 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized 38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool 38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult 39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UNormalizationCheckResult 40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string. 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it, 41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryBefore(UChar32 c) const; 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it, 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryAfter(UChar32 c) const; 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert. 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool isInert(UChar32 c) const; 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ICU "poor man's RTTI", returns a UClassID for this class. 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @returns a UClassID for this class. 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static UClassID U_EXPORT2 getStaticClassID(); 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * ICU "poor man's RTTI", returns a UClassID for the actual class. 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return a UClassID for the actual class. 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UClassID getDynamicClassID() const; 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate: 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString & 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho USetSpanCondition spanCondition, 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString & 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doNormalize, 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 &norm2; 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet &set; 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // !UCONFIG_NO_NORMALIZATION 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // __NORMALIZER2_H__ 480