normalizer2.h revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2009-2010, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  normalizer2.h
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov22
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __NORMALIZER2_H__
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __NORMALIZER2_H__
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C++ API: New API for Unicode Normalization.
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unorm2.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables.
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of this class are unmodifiable/immutable.
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by getInstance() are singletons that must not be deleted by the caller.
4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The primary functions are to produce a normalized string and to detect whether
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a string is already normalized.
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The most commonly used normalization forms are those defined in
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * http://www.unicode.org/unicode/reports/tr15/
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * However, this API supports additional normalization forms for specialized purposes.
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and can be used in implementations of UTS #46.
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not only are the standard compose and decompose modes supplied,
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * but additional modes are provided as documented in the Mode enum.
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Some of the functions in this class identify normalization boundaries.
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * At a normalization boundary, the portions of the string
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * before it and starting from it do not interact and can be handled independently.
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The spanQuickCheckYes() stops at a normalization boundary.
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string, then the text before the boundary
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a character is guaranteed to be at a normalization boundary,
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context.
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for moving from one normalization boundary to the next
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or preceding boundary, and for performing iterative normalization.
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Iterative normalization is useful when only a small portion of a
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * longer string needs to be processed.
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to process only the substring for which sort key bytes are computed).
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The set of normalization boundaries returned by these functions may not be
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * complete: There may be more boundaries that could be returned.
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Different functions may return different boundaries.
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API Normalizer2 : public UObject {
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns a Normalizer2 instance which uses the specified data file
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and which composes or decomposes text according to the specified mode.
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns an unmodifiable singleton instance. Do not delete it.
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use packageName=NULL for data files that are part of ICU's own data.
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param packageName NULL for ICU built-in data, otherwise application data package name
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param mode normalization mode (compose or decompose etc.)
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return the requested Normalizer2, if successful
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static const Normalizer2 *
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    getInstance(const char *packageName,
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const char *name,
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UNormalization2Mode mode,
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode &errorCode);
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the normalized form of the source string.
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return normalized src
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString result;
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normalize(src, result, errorCode);
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return result;
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Writes the normalized form of the source string to the destination string
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (replacing its contents) and returns the destination string.
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The source and destination strings must be different objects.
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param dest destination string; its contents is replaced with normalized src
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return dest
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const = 0;
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the normalized form of the second string to the first string
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if the first string was normalized.
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, will be normalized
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
15250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const = 0;
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the second string to the first string
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if both the strings were normalized.
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, should be normalized
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    append(UnicodeString &first,
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           const UnicodeString &second,
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           UErrorCode &errorCode) const = 0;
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Internally, in cases where the quickCheck() method would return "maybe"
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (which is only possible for the two COMPOSE modes) this method
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * resolves to "yes" or "no" to provide a definitive result,
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * at the cost of doing more work in those cases.
18250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
18350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
18750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if s is normalized
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For the two COMPOSE modes, the result could be "maybe" in cases that
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * would take a little more work to resolve definitively.
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * combination of quick check + normalization, to avoid
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * re-checking the "yes" prefix.
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return UNormalizationCheckResult
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UNormalizationCheckResult
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the end of the normalized substring of the input string.
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * the substring <code>UnicodeString(s, 0, end)</code>
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * will pass the quick check with a "yes" result.
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The returned end index is usually one or more characters before the
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * "no" or "maybe" character: The end index is at a normalization boundary.
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (See the class documentation for more about normalization boundaries.)
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * When the goal is a normalized string and most input strings are expected
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * to be normalized already, then call this method,
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and if it returns a prefix shorter than the input string,
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return "yes" span end index
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual int32_t
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary before it,
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not normalization-interact with
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * preceding characters.
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions before this character and starting from this
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary before it
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary after it,
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not normalization-interact with
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * following characters.
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions up to this character and after this
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Note that this operation may be significantly slower than hasBoundaryBefore().
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary after it
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character is normalization-inert.
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not change, nor normalization-interact with
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * preceding or following characters.
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions before this character and after this
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Note that this operation may be significantly slower than hasBoundaryBefore().
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c is normalization-inert
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
28050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool isInert(UChar32 c) const = 0;
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * ICU "poor man's RTTI", returns a UClassID for this class.
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @returns a UClassID for this class.
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static UClassID U_EXPORT2 getStaticClassID();
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * ICU "poor man's RTTI", returns a UClassID for the actual class.
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return a UClassID for the actual class.
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UClassID getDynamicClassID() const = 0;
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalization filtered by a UnicodeSet.
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalizes portions of the text contained in the filter set and leaves
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * portions not contained in the filter set unchanged.
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This class implements all of (and only) the Normalizer2 API.
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * An instance of this class is unmodifiable/immutable but is constructed and
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * must be destructed by the owner.
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @draft ICU 4.4
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Constructs a filtered normalizer wrapping any Normalizer2 instance
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and a filter set.
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Both are aliased and must not be modified or deleted while this object
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * is used.
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The filter set should be frozen; otherwise the performance will suffer greatly.
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param n2 wrapped Normalizer2 instance
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param filterSet UnicodeSet which determines the characters to be normalized
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2(n2), set(filterSet) {}
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Writes the normalized form of the source string to the destination string
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (replacing its contents) and returns the destination string.
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The source and destination strings must be different objects.
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param dest destination string; its contents is replaced with normalized src
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return dest
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const;
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
34150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the normalized form of the second string to the first string
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if the first string was normalized.
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, will be normalized
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const;
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
35950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the second string to the first string
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if both the strings were normalized.
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, should be normalized
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    append(UnicodeString &first,
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           const UnicodeString &second,
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           UErrorCode &errorCode) const;
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if s is normalized
38650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
38750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool
38950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
39050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
39150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
39250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
39350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
39450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
39550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
39750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return UNormalizationCheckResult
39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UNormalizationCheckResult
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the end of the normalized substring of the input string.
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
40650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return "yes" span end index
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual int32_t
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary before it,
41950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary before it
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryBefore(UChar32 c) const;
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary after it,
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary after it
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryAfter(UChar32 c) const;
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character is normalization-inert.
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c is normalization-inert
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool isInert(UChar32 c) const;
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * ICU "poor man's RTTI", returns a UClassID for this class.
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @returns a UClassID for this class.
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static UClassID U_EXPORT2 getStaticClassID();
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * ICU "poor man's RTTI", returns a UClassID for the actual class.
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return a UClassID for the actual class.
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @draft ICU 4.4
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UClassID getDynamicClassID() const;
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate:
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              USetSpanCondition spanCondition,
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const;
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UBool doNormalize,
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const;
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 &norm2;
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UnicodeSet &set;
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // !UCONFIG_NO_NORMALIZATION
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // __NORMALIZER2_H__
480