150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/*
250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius*   Copyright (C) 2009-2013, International Business Machines
550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Corporation and others.  All Rights Reserved.
650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*******************************************************************************
850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   file name:  unorm2.h
950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   encoding:   US-ASCII
1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   tab size:   8 (not used)
1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   indentation:4
1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created on: 2009dec15
1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   created by: Markus W. Scherer
1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/
1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __UNORM2_H__
1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __UNORM2_H__
1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C API: New API for Unicode Normalization.
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables.
2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of UNormalizer2 are unmodifiable/immutable.
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For more details see the Normalizer2 C++ class.
2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h"
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h"
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uset.h"
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constants for normalization modes.
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details about standard Unicode normalization forms
3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and about the algorithms which are also used with custom mapping tables
3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * see http://www.unicode.org/unicode/reports/tr15/
4027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehotypedef enum {
4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Decomposition followed by composition.
4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Same as standard NFC when using an "nfc" instance.
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Same as standard NFKC when using an "nfkc" instance.
4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details about standard Unicode normalization forms
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * see http://www.unicode.org/unicode/reports/tr15/
4927f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNORM2_COMPOSE,
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Map, and reorder canonically.
5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Same as standard NFD when using an "nfc" instance.
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Same as standard NFKD when using an "nfkc" instance.
5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details about standard Unicode normalization forms
5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * see http://www.unicode.org/unicode/reports/tr15/
5827f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNORM2_DECOMPOSE,
6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * "Fast C or D" form.
6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * If a string is in this form, then further decomposition <i>without reordering</i>
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * would yield the same form as DECOMPOSE.
6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Text in "Fast C or D" form can be processed efficiently with data tables
6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * that are "canonically closed", that is, that provide equivalent data for
6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * equivalent text, without having to be fully normalized.
6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Not a standard Unicode normalization form.
6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Not a unique form: Different FCD strings can be canonically equivalent.
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see http://www.unicode.org/notes/tn5/#FCD
7127f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNORM2_FCD,
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /**
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Compose only contiguously.
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Also known as "FCC" or "Fast C Contiguous".
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result will often but not always be in NFC.
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * The result will conform to FCD which is useful for processing.
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * Not a standard Unicode normalization form.
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     * For details see http://www.unicode.org/notes/tn5/#FCC
8127f654740f2a26ad62a5c155af9199af9e69b889claireho     * @stable ICU 4.4
8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho     */
8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UNORM2_COMPOSE_CONTIGUOUS
8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} UNormalization2Mode;
8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Result values for normalization quick check functions.
8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.0
9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
9150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehotypedef enum UNormalizationCheckResult {
9227f654740f2a26ad62a5c155af9199af9e69b889claireho  /**
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * The input string is not in the normalization form.
9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * @stable ICU 2.0
9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   */
9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  UNORM_NO,
9727f654740f2a26ad62a5c155af9199af9e69b889claireho  /**
9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * The input string is in the normalization form.
9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * @stable ICU 2.0
10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   */
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  UNORM_YES,
10227f654740f2a26ad62a5c155af9199af9e69b889claireho  /**
10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * The input string may or may not be in the normalization form.
10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * This value is only returned for composition forms like NFC and FCC,
10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * when a backward-combining character is found for which the surrounding text
10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * would have to be analyzed further.
10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   * @stable ICU 2.0
10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho   */
10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  UNORM_MAYBE
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} UNormalizationCheckResult;
11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Opaque C service object type for the new normalization API.
11427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
11650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct UNormalizer2;
11727f654740f2a26ad62a5c155af9199af9e69b889clairehotypedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
122103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFC normalization.
123103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
124103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it.
12554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  pass the U_SUCCESS() test, or else the function returns
127103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  immediately. Check for U_FAILURE() on output or use with
128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  function chaining. (See User Guide for details.)
129103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful
1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2
133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFCInstance(UErrorCode *pErrorCode);
134103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
135103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFD normalization.
137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
138103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it.
13954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
140103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  pass the U_SUCCESS() test, or else the function returns
141103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  immediately. Check for U_FAILURE() on output or use with
142103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  function chaining. (See User Guide for details.)
143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful
1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
145103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2
147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFDInstance(UErrorCode *pErrorCode);
148103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
149103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
150103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKC normalization.
151103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
152103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it.
15354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
154103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  pass the U_SUCCESS() test, or else the function returns
155103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  immediately. Check for U_FAILURE() on output or use with
156103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  function chaining. (See User Guide for details.)
157103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful
1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
159103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2
161103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKCInstance(UErrorCode *pErrorCode);
162103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
163103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
164103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKD normalization.
165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
166103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it.
16754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  pass the U_SUCCESS() test, or else the function returns
169103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  immediately. Check for U_FAILURE() on output or use with
170103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  function chaining. (See User Guide for details.)
171103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful
1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
173103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2
175103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKDInstance(UErrorCode *pErrorCode);
176103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
177103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
178103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
179103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it.
18154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
182103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  pass the U_SUCCESS() test, or else the function returns
183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  immediately. Check for U_FAILURE() on output or use with
184103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                  function chaining. (See User Guide for details.)
185103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful
1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2
189103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
190103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns a UNormalizer2 instance which uses the specified data file
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and which composes or decomposes text according to the specified mode.
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns an unmodifiable singleton instance. Do not delete it.
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use packageName=NULL for data files that are part of ICU's own data.
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param packageName NULL for ICU built-in data, otherwise application data package name
20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param mode normalization mode (compose or decompose etc.)
20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                  pass the U_SUCCESS() test, or else the function returns
20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                  immediately. Check for U_FAILURE() on output or use with
20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                  function chaining. (See User Guide for details.)
20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested UNormalizer2, if successful
21027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
21227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE const UNormalizer2 * U_EXPORT2
21350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_getInstance(const char *packageName,
21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   const char *name,
21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   UNormalization2Mode mode,
21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                   UErrorCode *pErrorCode);
21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constructs a filtered normalizer wrapping any UNormalizer2 instance
22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a filter set.
22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Both are aliased and must not be modified or deleted while this object
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * is used.
22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The filter set should be frozen; otherwise the performance will suffer greatly.
22427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param norm2 wrapped UNormalizer2 instance
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param filterSet USet which determines the characters to be normalized
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested UNormalizer2, if successful
23127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
23327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UNormalizer2 * U_EXPORT2
23450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Closes a UNormalizer2 instance from unorm2_openFiltered().
23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Do not close instances from unorm2_getInstance()!
23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance to be closed
24027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
24227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE void U_EXPORT2
24350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_close(UNormalizer2 *norm2);
24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API
24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN
24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \class LocalUNormalizer2Pointer
25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For most methods see the LocalPointerBase base class.
25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointerBase
25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointer
25627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
25850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END
26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string
26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the length of the destination string.
26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different buffers.
26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string
27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the source string, or -1 if NUL-terminated
27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src
27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param capacity number of UChars that can be written to dest
27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest
27827f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
28027f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2
28150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_normalize(const UNormalizer2 *norm2,
28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 const UChar *src, int32_t length,
28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UChar *dest, int32_t capacity,
28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                 UErrorCode *pErrorCode);
28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string
28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the length of the first string.
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized.
28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different buffers.
29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized
29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstLength length of the first string, or -1 if NUL-terminated
29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstCapacity number of UChars that can be written to first
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param secondLength length of the source string, or -1 if NUL-terminated
29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first
30127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
30327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UChar *first, int32_t firstLength, int32_t firstCapacity,
30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                const UChar *second, int32_t secondLength,
30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                                UErrorCode *pErrorCode);
30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string
31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the length of the first string.
31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized.
31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different buffers.
31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized
31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstLength length of the first string, or -1 if NUL-terminated
31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstCapacity number of UChars that can be written to first
31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized
31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param secondLength length of the source string, or -1 if NUL-terminated
31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first
32427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
32627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2
32750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_append(const UNormalizer2 *norm2,
32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UChar *first, int32_t firstLength, int32_t firstCapacity,
32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              const UChar *second, int32_t secondLength,
33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho              UErrorCode *pErrorCode);
33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
333103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the decomposition mapping of c.
334103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Roughly equivalent to normalizing the String form of c
335103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * returns a negative value and does not write a string
337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * if c does not have a decomposition mapping in this instance's data.
33827f654740f2a26ad62a5c155af9199af9e69b889claireho * This function is independent of the mode of the UNormalizer2.
33927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param norm2 UNormalizer2 instance
34027f654740f2a26ad62a5c155af9199af9e69b889claireho * @param c code point
34127f654740f2a26ad62a5c155af9199af9e69b889claireho * @param decomposition String buffer which will be set to c's
34227f654740f2a26ad62a5c155af9199af9e69b889claireho *                      decomposition mapping, if there is one.
34327f654740f2a26ad62a5c155af9199af9e69b889claireho * @param capacity number of UChars that can be written to decomposition
34427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param pErrorCode Standard ICU error code. Its input value must
34527f654740f2a26ad62a5c155af9199af9e69b889claireho *                   pass the U_SUCCESS() test, or else the function returns
34627f654740f2a26ad62a5c155af9199af9e69b889claireho *                   immediately. Check for U_FAILURE() on output or use with
34727f654740f2a26ad62a5c155af9199af9e69b889claireho *                   function chaining. (See User Guide for details.)
34827f654740f2a26ad62a5c155af9199af9e69b889claireho * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
349103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6
35027f654740f2a26ad62a5c155af9199af9e69b889claireho */
35154dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusU_STABLE int32_t U_EXPORT2
35227f654740f2a26ad62a5c155af9199af9e69b889clairehounorm2_getDecomposition(const UNormalizer2 *norm2,
35327f654740f2a26ad62a5c155af9199af9e69b889claireho                        UChar32 c, UChar *decomposition, int32_t capacity,
35427f654740f2a26ad62a5c155af9199af9e69b889claireho                        UErrorCode *pErrorCode);
35527f654740f2a26ad62a5c155af9199af9e69b889claireho
356103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
357103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the raw decomposition mapping of c.
358103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
359103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This is similar to the unorm2_getDecomposition() function but returns the
360103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * raw decomposition mapping as specified in UnicodeData.txt or
361103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * (for custom data) in the mapping files processed by the gennorm2 tool.
362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * By contrast, unorm2_getDecomposition() returns the processed,
363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * recursively-decomposed version of this mapping.
364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * When used on a standard NFKC Normalizer2 instance,
366103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * When used on a standard NFC Normalizer2 instance,
369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * in this case, the result contains either one or two code points (=1..4 UChars).
371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This function is independent of the mode of the UNormalizer2.
373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance
374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param c code point
375103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param decomposition String buffer which will be set to c's
376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                      raw decomposition mapping, if there is one.
377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param capacity number of UChars that can be written to decomposition
378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param pErrorCode Standard ICU error code. Its input value must
379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                   pass the U_SUCCESS() test, or else the function returns
380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                   immediately. Check for U_FAILURE() on output or use with
381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *                   function chaining. (See User Guide for details.)
382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
3838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
3858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE int32_t U_EXPORT2
386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getRawDecomposition(const UNormalizer2 *norm2,
387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                           UChar32 c, UChar *decomposition, int32_t capacity,
388103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                           UErrorCode *pErrorCode);
389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
390103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Performs pairwise composition of a & b and returns the composite if there is one.
392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a composite code point c only if c has a two-way mapping to a+b.
394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * In standard Unicode normalization, this means that
395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * c has a canonical decomposition to a+b
396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * and c does not have the Full_Composition_Exclusion property.
397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius *
398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This function is independent of the mode of the UNormalizer2.
399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance
400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param a A (normalization starter) code point.
401103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param b Another code point.
402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return The non-negative composite code point if there is one; otherwise a negative value.
4038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
4058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE UChar32 U_EXPORT2
406103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
407103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
408103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/**
409103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the combining class of c.
410103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * The default implementation returns 0
411103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * but all standard implementations return the Unicode Canonical_Combining_Class value.
412103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance
413103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param c code point
414103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return c's combining class
4158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49
416103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */
4178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE uint8_t U_EXPORT2
418103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
419103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius
42027f654740f2a26ad62a5c155af9199af9e69b889claireho/**
42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized.
42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Internally, in cases where the quickCheck() method would return "maybe"
42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is only possible for the two COMPOSE modes) this method
42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * resolves to "yes" or "no" to provide a definitive result,
42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * at the cost of doing more work in those cases.
42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string
42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated
42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized
43427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
43627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2
43750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_isNormalized(const UNormalizer2 *norm2,
43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    const UChar *s, int32_t length,
43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    UErrorCode *pErrorCode);
44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized.
44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For the two COMPOSE modes, the result could be "maybe" in cases that
44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would take a little more work to resolve definitively.
44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * combination of quick check + normalization, to avoid
44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * re-checking the "yes" prefix.
44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated
45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult
45627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
45827f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UNormalizationCheckResult U_EXPORT2
45950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_quickCheck(const UNormalizer2 *norm2,
46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                  const UChar *s, int32_t length,
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                  UErrorCode *pErrorCode);
46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string.
46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the substring <code>UnicodeString(s, 0, end)</code>
46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will pass the quick check with a "yes" result.
46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The returned end index is usually one or more characters before the
47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "no" or "maybe" character: The end index is at a normalization boundary.
47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (See the class documentation for more about normalization boundaries.)
47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string and most input strings are expected
47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to be normalized already, then call this method,
47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and if it returns a prefix shorter than the input string,
47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * copy that prefix and use normalizeSecondAndAppend() for the remainder.
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string
47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated
48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   pass the U_SUCCESS() test, or else the function returns
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   immediately. Check for U_FAILURE() on output or use with
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *                   function chaining. (See User Guide for details.)
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index
48527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
48727f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         const UChar *s, int32_t length,
49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode *pErrorCode);
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it,
49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context.
49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation.
49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test
49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it
49927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
50127f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2
50250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it,
50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context.
50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation.
50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test
51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it
51127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
51250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
51327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2
51450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/**
51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert.
51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation.
51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance
52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test
52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert
52227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4
52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */
52427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2
52550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  /* !UCONFIG_NO_NORMALIZATION */
52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif  /* __UNORM2_H__ */
529