16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 2009-2013, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  normalizer2.h
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2009nov22
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Markus W. Scherer
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#ifndef __NORMALIZER2_H__
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define __NORMALIZER2_H__
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * \file
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * \brief C++ API: New API for Unicode Normalization.
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h"
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unorm2.h"
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode normalization functionality for standard Unicode normalization or
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for using custom mapping tables.
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All instances of this class are unmodifiable/immutable.
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Instances returned by getInstance() are singletons that must not be deleted by the caller.
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The Normalizer2 class is not intended for public subclassing.
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The primary functions are to produce a normalized string and to detect whether
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a string is already normalized.
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The most commonly used normalization forms are those defined in
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * http://www.unicode.org/unicode/reports/tr15/
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * However, this API supports additional normalization forms for specialized purposes.
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and can be used in implementations of UTS #46.
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not only are the standard compose and decompose modes supplied,
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but additional modes are provided as documented in the Mode enum.
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Some of the functions in this class identify normalization boundaries.
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * At a normalization boundary, the portions of the string
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * before it and starting from it do not interact and can be handled independently.
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The spanQuickCheckYes() stops at a normalization boundary.
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When the goal is a normalized string, then the text before the boundary
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a character is guaranteed to be at a normalization boundary,
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * regardless of context.
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is used for moving from one normalization boundary to the next
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or preceding boundary, and for performing iterative normalization.
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Iterative normalization is useful when only a small portion of a
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * longer string needs to be processed.
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (to process only the substring for which sort key bytes are computed).
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The set of normalization boundaries returned by these functions may not be
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * complete: There may be more boundaries that could be returned.
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Different functions may return different boundaries.
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 4.4
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass U_COMMON_API Normalizer2 : public UObject {
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic:
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Destructor.
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ~Normalizer2();
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance for Unicode NFC normalization.
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getNFCInstance(UErrorCode &errorCode);
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance for Unicode NFD normalization.
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getNFDInstance(UErrorCode &errorCode);
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance for Unicode NFKC normalization.
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getNFKCInstance(UErrorCode &errorCode);
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance for Unicode NFKD normalization.
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getNFKDInstance(UErrorCode &errorCode);
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getNFKCCasefoldInstance(UErrorCode &errorCode);
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a Normalizer2 instance which uses the specified data file
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * and which composes or decomposes text according to the specified mode.
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns an unmodifiable singleton instance. Do not delete it.
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Use packageName=NULL for data files that are part of ICU's own data.
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param packageName NULL for ICU built-in data, otherwise application data package name
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param mode normalization mode (compose or decompose etc.)
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return the requested Normalizer2, if successful
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static const Normalizer2 *
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getInstance(const char *packageName,
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                const char *name,
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UNormalization2Mode mode,
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                UErrorCode &errorCode);
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns the normalized form of the source string.
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param src source string
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return normalized src
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString result;
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        normalize(src, result, errorCode);
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return result;
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Writes the normalized form of the source string to the destination string
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (replacing its contents) and returns the destination string.
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The source and destination strings must be different objects.
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param src source string
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param dest destination string; its contents is replaced with normalized src
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return dest
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(const UnicodeString &src,
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UnicodeString &dest,
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UErrorCode &errorCode) const = 0;
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Appends the normalized form of the second string to the first string
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (merging them at the boundary) and returns the first string.
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The result is normalized if the first string was normalized.
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The first and second strings must be different objects.
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param first string, should be normalized
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param second string, will be normalized
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return first
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalizeSecondAndAppend(UnicodeString &first,
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             const UnicodeString &second,
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             UErrorCode &errorCode) const = 0;
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Appends the second string to the first string
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (merging them at the boundary) and returns the first string.
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The result is normalized if both the strings were normalized.
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The first and second strings must be different objects.
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param first string, should be normalized
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param second string, should be normalized
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return first
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    append(UnicodeString &first,
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           const UnicodeString &second,
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           UErrorCode &errorCode) const = 0;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the decomposition mapping of c.
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Roughly equivalent to normalizing the String form of c
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * returns FALSE and does not write a string
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * if c does not have a decomposition mapping in this instance's data.
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param decomposition String object which will be set to c's
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                      decomposition mapping, if there is one.
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a decomposition, otherwise FALSE
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.6
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the raw decomposition mapping of c.
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This is similar to the getDecomposition() method but returns the
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * raw decomposition mapping as specified in UnicodeData.txt or
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (for custom data) in the mapping files processed by the gennorm2 tool.
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * By contrast, getDecomposition() returns the processed,
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * recursively-decomposed version of this mapping.
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * When used on a standard NFKC Normalizer2 instance,
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * When used on a standard NFC Normalizer2 instance,
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * in this case, the result contains either one or two code points (=1..4 UChars).
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The default implementation returns FALSE.
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param decomposition String object which will be set to c's
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                      raw decomposition mapping, if there is one.
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a decomposition, otherwise FALSE
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Performs pairwise composition of a & b and returns the composite if there is one.
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns a composite code point c only if c has a two-way mapping to a+b.
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * In standard Unicode normalization, this means that
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * c has a canonical decomposition to a+b
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * and c does not have the Full_Composition_Exclusion property.
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The default implementation returns a negative value.
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param a A (normalization starter) code point.
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param b Another code point.
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return The non-negative composite code point if there is one; otherwise a negative value.
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UChar32
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    composePair(UChar32 a, UChar32 b) const;
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the combining class of c.
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The default implementation returns 0
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * but all standard implementations return the Unicode Canonical_Combining_Class value.
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return c's combining class
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual uint8_t
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getCombiningClass(UChar32 c) const;
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the string is normalized.
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Internally, in cases where the quickCheck() method would return "maybe"
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (which is only possible for the two COMPOSE modes) this method
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * resolves to "yes" or "no" to provide a definitive result,
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * at the cost of doing more work in those cases.
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if s is normalized
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the string is normalized.
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For the two COMPOSE modes, the result could be "maybe" in cases that
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * would take a little more work to resolve definitively.
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * combination of quick check + normalization, to avoid
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * re-checking the "yes" prefix.
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return UNormalizationCheckResult
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UNormalizationCheckResult
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns the end of the normalized substring of the input string.
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * the substring <code>UnicodeString(s, 0, end)</code>
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * will pass the quick check with a "yes" result.
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The returned end index is usually one or more characters before the
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * "no" or "maybe" character: The end index is at a normalization boundary.
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (See the class documentation for more about normalization boundaries.)
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * When the goal is a normalized string and most input strings are expected
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * to be normalized already, then call this method,
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * and if it returns a prefix shorter than the input string,
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return "yes" span end index
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual int32_t
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character always has a normalization boundary before it,
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * regardless of context.
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * If true, then the character does not normalization-interact with
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * preceding characters.
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * In other words, a string containing this character can be normalized
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * by processing portions before this character and starting from this
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * character independently.
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This is used for iterative normalization. See the class documentation for details.
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a normalization boundary before it
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character always has a normalization boundary after it,
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * regardless of context.
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * If true, then the character does not normalization-interact with
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * following characters.
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * In other words, a string containing this character can be normalized
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * by processing portions up to this character and after this
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * character independently.
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This is used for iterative normalization. See the class documentation for details.
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Note that this operation may be significantly slower than hasBoundaryBefore().
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a normalization boundary after it
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character is normalization-inert.
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * If true, then the character does not change, nor normalization-interact with
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * preceding or following characters.
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * In other words, a string containing this character can be normalized
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * by processing portions before this character and after this
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * character independently.
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This is used for iterative normalization. See the class documentation for details.
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Note that this operation may be significantly slower than hasBoundaryBefore().
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c is normalization-inert
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool isInert(UChar32 c) const = 0;
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Normalization filtered by a UnicodeSet.
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Normalizes portions of the text contained in the filter set and leaves
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * portions not contained in the filter set unchanged.
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This class implements all of (and only) the Normalizer2 API.
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An instance of this class is unmodifiable/immutable but is constructed and
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * must be destructed by the owner.
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @stable ICU 4.4
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic:
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Constructs a filtered normalizer wrapping any Normalizer2 instance
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * and a filter set.
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Both are aliased and must not be modified or deleted while this object
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * is used.
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The filter set should be frozen; otherwise the performance will suffer greatly.
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param n2 wrapped Normalizer2 instance
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param filterSet UnicodeSet which determines the characters to be normalized
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            norm2(n2), set(filterSet) {}
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Destructor.
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ~FilteredNormalizer2();
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Writes the normalized form of the source string to the destination string
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (replacing its contents) and returns the destination string.
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The source and destination strings must be different objects.
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param src source string
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param dest destination string; its contents is replaced with normalized src
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return dest
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(const UnicodeString &src,
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UnicodeString &dest,
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UErrorCode &errorCode) const;
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Appends the normalized form of the second string to the first string
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (merging them at the boundary) and returns the first string.
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The result is normalized if the first string was normalized.
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The first and second strings must be different objects.
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param first string, should be normalized
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param second string, will be normalized
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return first
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalizeSecondAndAppend(UnicodeString &first,
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             const UnicodeString &second,
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             UErrorCode &errorCode) const;
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Appends the second string to the first string
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * (merging them at the boundary) and returns the first string.
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The result is normalized if both the strings were normalized.
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The first and second strings must be different objects.
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param first string, should be normalized
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param second string, should be normalized
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return first
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UnicodeString &
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    append(UnicodeString &first,
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           const UnicodeString &second,
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org           UErrorCode &errorCode) const;
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the decomposition mapping of c.
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the base class documentation.
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param decomposition String object which will be set to c's
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                      decomposition mapping, if there is one.
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a decomposition, otherwise FALSE
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.6
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getDecomposition(UChar32 c, UnicodeString &decomposition) const;
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the raw decomposition mapping of c.
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the base class documentation.
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param decomposition String object which will be set to c's
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                      raw decomposition mapping, if there is one.
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a decomposition, otherwise FALSE
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Performs pairwise composition of a & b and returns the composite if there is one.
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the base class documentation.
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * This function is independent of the mode of the Normalizer2.
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param a A (normalization starter) code point.
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param b Another code point.
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return The non-negative composite code point if there is one; otherwise a negative value.
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UChar32
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    composePair(UChar32 a, UChar32 b) const;
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Gets the combining class of c.
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * The default implementation returns 0
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * but all standard implementations return the Unicode Canonical_Combining_Class value.
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c code point
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return c's combining class
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 49
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual uint8_t
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    getCombiningClass(UChar32 c) const;
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the string is normalized.
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if s is normalized
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the string is normalized.
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return UNormalizationCheckResult
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UNormalizationCheckResult
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Returns the end of the normalized substring of the input string.
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param s input string
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param errorCode Standard ICU error code. Its input value must
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  pass the U_SUCCESS() test, or else the function returns
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  immediately. Check for U_FAILURE() on output or use with
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     *                  function chaining. (See User Guide for details.)
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return "yes" span end index
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual int32_t
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character always has a normalization boundary before it,
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * regardless of context.
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a normalization boundary before it
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool hasBoundaryBefore(UChar32 c) const;
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character always has a normalization boundary after it,
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * regardless of context.
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c has a normalization boundary after it
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool hasBoundaryAfter(UChar32 c) const;
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /**
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * Tests if the character is normalization-inert.
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * For details see the Normalizer2 base class documentation.
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @param c character to test
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @return TRUE if c is normalization-inert
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     * @stable ICU 4.4
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org     */
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    virtual UBool isInert(UChar32 c) const;
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprivate:
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString &
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(const UnicodeString &src,
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UnicodeString &dest,
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              USetSpanCondition spanCondition,
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org              UErrorCode &errorCode) const;
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString &
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalizeSecondAndAppend(UnicodeString &first,
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             const UnicodeString &second,
6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             UBool doNormalize,
6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                             UErrorCode &errorCode) const;
6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const Normalizer2 &norm2;
6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UnicodeSet &set;
6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  // !UCONFIG_NO_NORMALIZATION
6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif  // __NORMALIZER2_H__
659