normalizer2.h revision b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2
150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2009-2011, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: normalizer2.h 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009nov22 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __NORMALIZER2_H__ 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __NORMALIZER2_H__ 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C++ API: New API for Unicode Normalization. 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h" 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h" 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unorm2.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables. 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of this class are unmodifiable/immutable. 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by getInstance() are singletons that must not be deleted by the caller. 4027f654740f2a26ad62a5c155af9199af9e69b889claireho * The Normalizer2 class is not intended for public subclassing. 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The primary functions are to produce a normalized string and to detect whether 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a string is already normalized. 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The most commonly used normalization forms are those defined in 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * http://www.unicode.org/unicode/reports/tr15/ 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * However, this API supports additional normalization forms for specialized purposes. 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and can be used in implementations of UTS #46. 4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not only are the standard compose and decompose modes supplied, 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * but additional modes are provided as documented in the Mode enum. 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Some of the functions in this class identify normalization boundaries. 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * At a normalization boundary, the portions of the string 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * before it and starting from it do not interact and can be handled independently. 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The spanQuickCheckYes() stops at a normalization boundary. 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string, then the text before the boundary 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a character is guaranteed to be at a normalization boundary, 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for moving from one normalization boundary to the next 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or preceding boundary, and for performing iterative normalization. 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Iterative normalization is useful when only a small portion of a 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * longer string needs to be processed. 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to process only the substring for which sort key bytes are computed). 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The set of normalization boundaries returned by these functions may not be 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * complete: There may be more boundaries that could be returned. 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Different functions may return different boundaries. 7627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API Normalizer2 : public UObject { 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns a Normalizer2 instance which uses the specified data file 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and which composes or decomposes text according to the specified mode. 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns an unmodifiable singleton instance. Do not delete it. 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use packageName=NULL for data files that are part of ICU's own data. 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param packageName NULL for ICU built-in data, otherwise application data package name 9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param mode normalization mode (compose or decompose etc.) 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested Normalizer2, if successful 9927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const Normalizer2 * 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho getInstance(const char *packageName, 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *name, 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalization2Mode mode, 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode); 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the normalized form of the source string. 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return normalized src 11527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, UErrorCode &errorCode) const { 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result; 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(src, result, errorCode); 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return result; 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the destination string. 12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different objects. 12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest 13427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized. 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 15227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized. 16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 17027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(UnicodeString &first, 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const = 0; 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Gets the decomposition mapping of c. 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Roughly equivalent to normalizing the String form of c 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * returns FALSE and does not write a string 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * if c does not have a decomposition mapping in this instance's data. 18327f654740f2a26ad62a5c155af9199af9e69b889claireho * This function is independent of the mode of the Normalizer2. 18427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param c code point 18527f654740f2a26ad62a5c155af9199af9e69b889claireho * @param decomposition String object which will be set to c's 18627f654740f2a26ad62a5c155af9199af9e69b889claireho * decomposition mapping, if there is one. 18727f654740f2a26ad62a5c155af9199af9e69b889claireho * @return TRUE if c has a decomposition, otherwise FALSE 18827f654740f2a26ad62a5c155af9199af9e69b889claireho * @draft ICU 4.6 18927f654740f2a26ad62a5c155af9199af9e69b889claireho */ 19027f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UBool 19127f654740f2a26ad62a5c155af9199af9e69b889claireho getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 19227f654740f2a26ad62a5c155af9199af9e69b889claireho 19327f654740f2a26ad62a5c155af9199af9e69b889claireho /** 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Internally, in cases where the quickCheck() method would return "maybe" 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is only possible for the two COMPOSE modes) this method 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * resolves to "yes" or "no" to provide a definitive result, 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * at the cost of doing more work in those cases. 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized 20527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For the two COMPOSE modes, the result could be "maybe" in cases that 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would take a little more work to resolve definitively. 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * combination of quick check + normalization, to avoid 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * re-checking the "yes" prefix. 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult 22327f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UNormalizationCheckResult 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string. 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the substring <code>UnicodeString(s, 0, end)</code> 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will pass the quick check with a "yes" result. 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The returned end index is usually one or more characters before the 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "no" or "maybe" character: The end index is at a normalization boundary. 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (See the class documentation for more about normalization boundaries.) 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string and most input strings are expected 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to be normalized already, then call this method, 24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and if it returns a prefix shorter than the input string, 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * copy that prefix and use normalizeSecondAndAppend() for the remainder. 24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index 24827f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it, 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not normalization-interact with 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * preceding characters. 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions before this character and starting from this 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it 26427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it, 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not normalization-interact with 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * following characters. 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions up to this character and after this 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that this operation may be significantly slower than hasBoundaryBefore(). 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it 28027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert. 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If true, then the character does not change, nor normalization-interact with 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * preceding or following characters. 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, a string containing this character can be normalized 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * by processing portions before this character and after this 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * character independently. 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for iterative normalization. See the class documentation for details. 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note that this operation may be significantly slower than hasBoundaryBefore(). 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert 29527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool isInert(UChar32 c) const = 0; 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 29927f654740f2a26ad62a5c155af9199af9e69b889clairehoprivate: 30027f654740f2a26ad62a5c155af9199af9e69b889claireho // No ICU "poor man's RTTI" for this class nor its subclasses. 30127f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UClassID getDynamicClassID() const; 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalization filtered by a UnicodeSet. 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalizes portions of the text contained in the filter set and leaves 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * portions not contained in the filter set unchanged. 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This class implements all of (and only) the Normalizer2 API. 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * An instance of this class is unmodifiable/immutable but is constructed and 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * must be destructed by the owner. 31327f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic: 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constructs a filtered normalizer wrapping any Normalizer2 instance 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a filter set. 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Both are aliased and must not be modified or deleted while this object 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * is used. 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The filter set should be frozen; otherwise the performance will suffer greatly. 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param n2 wrapped Normalizer2 instance 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param filterSet UnicodeSet which determines the characters to be normalized 32527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho norm2(n2), set(filterSet) {} 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the destination string. 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different objects. 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest 34127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized. 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized 35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 35927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the first string. 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized. 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different objects. 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized 37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 37727f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UnicodeString & 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(UnicodeString &first, 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 38527f654740f2a26ad62a5c155af9199af9e69b889claireho * Gets the decomposition mapping of c. Equivalent to normalize(UnicodeString(c)) 38627f654740f2a26ad62a5c155af9199af9e69b889claireho * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster. 38727f654740f2a26ad62a5c155af9199af9e69b889claireho * This function is independent of the mode of the Normalizer2. 38827f654740f2a26ad62a5c155af9199af9e69b889claireho * @param c code point 38927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param decomposition String object which will be set to c's 39027f654740f2a26ad62a5c155af9199af9e69b889claireho * decomposition mapping, if there is one. 39127f654740f2a26ad62a5c155af9199af9e69b889claireho * @return TRUE if c has a decomposition, otherwise FALSE 39227f654740f2a26ad62a5c155af9199af9e69b889claireho * @draft ICU 4.6 39327f654740f2a26ad62a5c155af9199af9e69b889claireho */ 39427f654740f2a26ad62a5c155af9199af9e69b889claireho virtual UBool 39527f654740f2a26ad62a5c155af9199af9e69b889claireho getDecomposition(UChar32 c, UnicodeString &decomposition) const; 39627f654740f2a26ad62a5c155af9199af9e69b889claireho 39727f654740f2a26ad62a5c155af9199af9e69b889claireho /** 39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized 40627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool 40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult 41927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UNormalizationCheckResult 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string. 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param errorCode Standard ICU error code. Its input value must 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index 43227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual int32_t 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it, 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it 44327f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryBefore(UChar32 c) const; 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it, 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it 45327f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool hasBoundaryAfter(UChar32 c) const; 45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert. 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert 46227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho virtual UBool isInert(UChar32 c) const; 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate: 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString & 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(const UnicodeString &src, 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString &dest, 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho USetSpanCondition spanCondition, 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString & 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalizeSecondAndAppend(UnicodeString &first, 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeString &second, 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doNormalize, 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &errorCode) const; 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 &norm2; 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UnicodeSet &set; 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // !UCONFIG_NO_NORMALIZATION 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif // __NORMALIZER2_H__ 486