normalizer2.h revision b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2
150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2009-2011, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  normalizer2.h
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009nov22
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __NORMALIZER2_H__
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __NORMALIZER2_H__
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C++ API: New API for Unicode Normalization.
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h"
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unorm2.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables.
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of this class are unmodifiable/immutable.
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by getInstance() are singletons that must not be deleted by the caller.
4027f654740f2a26ad62a5c155af9199af9e69b889claireho * The Normalizer2 class is not intended for public subclassing.
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The primary functions are to produce a normalized string and to detect whether
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a string is already normalized.
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The most commonly used normalization forms are those defined in
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * http://www.unicode.org/unicode/reports/tr15/
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * However, this API supports additional normalization forms for specialized purposes.
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and can be used in implementations of UTS #46.
4950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not only are the standard compose and decompose modes supplied,
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * but additional modes are provided as documented in the Mode enum.
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Some of the functions in this class identify normalization boundaries.
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * At a normalization boundary, the portions of the string
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * before it and starting from it do not interact and can be handled independently.
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The spanQuickCheckYes() stops at a normalization boundary.
5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string, then the text before the boundary
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * a character is guaranteed to be at a normalization boundary,
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context.
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This is used for moving from one normalization boundary to the next
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * or preceding boundary, and for performing iterative normalization.
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Iterative normalization is useful when only a small portion of a
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * longer string needs to be processed.
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
7150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (to process only the substring for which sort key bytes are computed).
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The set of normalization boundaries returned by these functions may not be
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * complete: There may be more boundaries that could be returned.
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Different functions may return different boundaries.
7627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API Normalizer2 : public UObject {
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
8150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns a Normalizer2 instance which uses the specified data file
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and which composes or decomposes text according to the specified mode.
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns an unmodifiable singleton instance. Do not delete it.
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use packageName=NULL for data files that are part of ICU's own data.
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param packageName NULL for ICU built-in data, otherwise application data package name
9250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param mode normalization mode (compose or decompose etc.)
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
9750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return the requested Normalizer2, if successful
9927f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    static const Normalizer2 *
10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    getInstance(const char *packageName,
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                const char *name,
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UNormalization2Mode mode,
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                UErrorCode &errorCode);
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the normalized form of the source string.
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return normalized src
11527f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UnicodeString result;
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        normalize(src, result, errorCode);
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return result;
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Writes the normalized form of the source string to the destination string
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (replacing its contents) and returns the destination string.
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The source and destination strings must be different objects.
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param dest destination string; its contents is replaced with normalized src
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
13150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
13250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return dest
13427f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
13550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
13650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const = 0;
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the normalized form of the second string to the first string
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if the first string was normalized.
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, will be normalized
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
15150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
15227f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const = 0;
15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the second string to the first string
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
16150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if both the strings were normalized.
16250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, should be normalized
16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
17027f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    append(UnicodeString &first,
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           const UnicodeString &second,
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           UErrorCode &errorCode) const = 0;
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Gets the decomposition mapping of c.
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Roughly equivalent to normalizing the String form of c
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * returns FALSE and does not write a string
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * if c does not have a decomposition mapping in this instance's data.
18327f654740f2a26ad62a5c155af9199af9e69b889claireho     * This function is independent of the mode of the Normalizer2.
18427f654740f2a26ad62a5c155af9199af9e69b889claireho     * @param c code point
18527f654740f2a26ad62a5c155af9199af9e69b889claireho     * @param decomposition String object which will be set to c's
18627f654740f2a26ad62a5c155af9199af9e69b889claireho     *                      decomposition mapping, if there is one.
18727f654740f2a26ad62a5c155af9199af9e69b889claireho     * @return TRUE if c has a decomposition, otherwise FALSE
18827f654740f2a26ad62a5c155af9199af9e69b889claireho     * @draft ICU 4.6
18927f654740f2a26ad62a5c155af9199af9e69b889claireho     */
19027f654740f2a26ad62a5c155af9199af9e69b889claireho    virtual UBool
19127f654740f2a26ad62a5c155af9199af9e69b889claireho    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
19227f654740f2a26ad62a5c155af9199af9e69b889claireho
19327f654740f2a26ad62a5c155af9199af9e69b889claireho    /**
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Internally, in cases where the quickCheck() method would return "maybe"
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (which is only possible for the two COMPOSE modes) this method
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * resolves to "yes" or "no" to provide a definitive result,
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * at the cost of doing more work in those cases.
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if s is normalized
20527f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
21050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
21250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For the two COMPOSE modes, the result could be "maybe" in cases that
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * would take a little more work to resolve definitively.
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * combination of quick check + normalization, to avoid
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * re-checking the "yes" prefix.
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return UNormalizationCheckResult
22327f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
22450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UNormalizationCheckResult
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the end of the normalized substring of the input string.
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * the substring <code>UnicodeString(s, 0, end)</code>
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * will pass the quick check with a "yes" result.
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The returned end index is usually one or more characters before the
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * "no" or "maybe" character: The end index is at a normalization boundary.
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (See the class documentation for more about normalization boundaries.)
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * When the goal is a normalized string and most input strings are expected
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * to be normalized already, then call this method,
24050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and if it returns a prefix shorter than the input string,
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
24250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return "yes" span end index
24827f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual int32_t
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary before it,
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not normalization-interact with
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * preceding characters.
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions before this character and starting from this
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary before it
26427f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary after it,
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not normalization-interact with
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * following characters.
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions up to this character and after this
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Note that this operation may be significantly slower than hasBoundaryBefore().
27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary after it
28027f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character is normalization-inert.
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If true, then the character does not change, nor normalization-interact with
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * preceding or following characters.
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * In other words, a string containing this character can be normalized
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * by processing portions before this character and after this
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * character independently.
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * This is used for iterative normalization. See the class documentation for details.
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Note that this operation may be significantly slower than hasBoundaryBefore().
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c is normalization-inert
29527f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool isInert(UChar32 c) const = 0;
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
29927f654740f2a26ad62a5c155af9199af9e69b889clairehoprivate:
30027f654740f2a26ad62a5c155af9199af9e69b889claireho    // No ICU "poor man's RTTI" for this class nor its subclasses.
30127f654740f2a26ad62a5c155af9199af9e69b889claireho    virtual UClassID getDynamicClassID() const;
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
30350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalization filtered by a UnicodeSet.
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Normalizes portions of the text contained in the filter set and leaves
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * portions not contained in the filter set unchanged.
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This class implements all of (and only) the Normalizer2 API.
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * An instance of this class is unmodifiable/immutable but is constructed and
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * must be destructed by the owner.
31327f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoclass U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehopublic:
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Constructs a filtered normalizer wrapping any Normalizer2 instance
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * and a filter set.
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Both are aliased and must not be modified or deleted while this object
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * is used.
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The filter set should be frozen; otherwise the performance will suffer greatly.
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param n2 wrapped Normalizer2 instance
32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param filterSet UnicodeSet which determines the characters to be normalized
32527f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            norm2(n2), set(filterSet) {}
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Writes the normalized form of the source string to the destination string
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (replacing its contents) and returns the destination string.
33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The source and destination strings must be different objects.
33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param src source string
33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param dest destination string; its contents is replaced with normalized src
33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return dest
34127f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const;
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the normalized form of the second string to the first string
34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if the first string was normalized.
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
35250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
35350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, will be normalized
35450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
35550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
35750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
35850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
35927f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
36050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
36350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const;
36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Appends the second string to the first string
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * (merging them at the boundary) and returns the first string.
36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result is normalized if both the strings were normalized.
36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The first and second strings must be different objects.
37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param first string, should be normalized
37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param second string, should be normalized
37250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
37350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
37650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return first
37727f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UnicodeString &
38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    append(UnicodeString &first,
38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           const UnicodeString &second,
38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho           UErrorCode &errorCode) const;
38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
38450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
38527f654740f2a26ad62a5c155af9199af9e69b889claireho     * Gets the decomposition mapping of c. Equivalent to normalize(UnicodeString(c))
38627f654740f2a26ad62a5c155af9199af9e69b889claireho     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster.
38727f654740f2a26ad62a5c155af9199af9e69b889claireho     * This function is independent of the mode of the Normalizer2.
38827f654740f2a26ad62a5c155af9199af9e69b889claireho     * @param c code point
38927f654740f2a26ad62a5c155af9199af9e69b889claireho     * @param decomposition String object which will be set to c's
39027f654740f2a26ad62a5c155af9199af9e69b889claireho     *                      decomposition mapping, if there is one.
39127f654740f2a26ad62a5c155af9199af9e69b889claireho     * @return TRUE if c has a decomposition, otherwise FALSE
39227f654740f2a26ad62a5c155af9199af9e69b889claireho     * @draft ICU 4.6
39327f654740f2a26ad62a5c155af9199af9e69b889claireho     */
39427f654740f2a26ad62a5c155af9199af9e69b889claireho    virtual UBool
39527f654740f2a26ad62a5c155af9199af9e69b889claireho    getDecomposition(UChar32 c, UnicodeString &decomposition) const;
39627f654740f2a26ad62a5c155af9199af9e69b889claireho
39727f654740f2a26ad62a5c155af9199af9e69b889claireho    /**
39850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
39950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
40050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
40150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
40250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
40350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
40450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
40550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if s is normalized
40627f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
40750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
40850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool
40950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the string is normalized.
41250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
41450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
41550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
41650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
41750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return UNormalizationCheckResult
41927f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
42050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UNormalizationCheckResult
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Returns the end of the normalized substring of the input string.
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param s input string
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param errorCode Standard ICU error code. Its input value must
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  pass the U_SUCCESS() test, or else the function returns
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  immediately. Check for U_FAILURE() on output or use with
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     *                  function chaining. (See User Guide for details.)
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return "yes" span end index
43227f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
43450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual int32_t
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
43650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary before it,
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary before it
44327f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryBefore(UChar32 c) const;
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character always has a normalization boundary after it,
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * regardless of context.
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c has a normalization boundary after it
45327f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool hasBoundaryAfter(UChar32 c) const;
45650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
45850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Tests if the character is normalization-inert.
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see the Normalizer2 base class documentation.
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @param c character to test
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * @return TRUE if c is normalization-inert
46227f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    virtual UBool isInert(UChar32 c) const;
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoprivate:
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(const UnicodeString &src,
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UnicodeString &dest,
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              USetSpanCondition spanCondition,
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode &errorCode) const;
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString &
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalizeSecondAndAppend(UnicodeString &first,
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             const UnicodeString &second,
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UBool doNormalize,
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                             UErrorCode &errorCode) const;
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 &norm2;
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const UnicodeSet &set;
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho};
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // !UCONFIG_NO_NORMALIZATION
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  // __NORMALIZER2_H__
486