150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* 250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 48393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius* Copyright (C) 2009-2013, International Business Machines 550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Corporation and others. All Rights Reserved. 650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho******************************************************************************* 850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* file name: unorm2.h 950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* encoding: US-ASCII 1050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* tab size: 8 (not used) 1150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* indentation:4 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created on: 2009dec15 1450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* created by: Markus W. Scherer 1550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*/ 1650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 1750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#ifndef __UNORM2_H__ 1850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define __UNORM2_H__ 1950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \file 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \brief C API: New API for Unicode Normalization. 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Unicode normalization functionality for standard Unicode normalization or 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for using custom mapping tables. 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * All instances of UNormalizer2 are unmodifiable/immutable. 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For more details see the Normalizer2 C++ class. 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/utypes.h" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/localpointer.h" 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uset.h" 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constants for normalization modes. 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details about standard Unicode normalization forms 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and about the algorithms which are also used with custom mapping tables 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * see http://www.unicode.org/unicode/reports/tr15/ 4027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehotypedef enum { 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Decomposition followed by composition. 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Same as standard NFC when using an "nfc" instance. 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Same as standard NFKC when using an "nfkc" instance. 4750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details about standard Unicode normalization forms 4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * see http://www.unicode.org/unicode/reports/tr15/ 4927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_COMPOSE, 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 5350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Map, and reorder canonically. 5450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Same as standard NFD when using an "nfc" instance. 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Same as standard NFKD when using an "nfkc" instance. 5650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details about standard Unicode normalization forms 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * see http://www.unicode.org/unicode/reports/tr15/ 5827f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_DECOMPOSE, 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 6250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "Fast C or D" form. 6350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * If a string is in this form, then further decomposition <i>without reordering</i> 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would yield the same form as DECOMPOSE. 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Text in "Fast C or D" form can be processed efficiently with data tables 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * that are "canonically closed", that is, that provide equivalent data for 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * equivalent text, without having to be fully normalized. 6850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not a standard Unicode normalization form. 6950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not a unique form: Different FCD strings can be canonically equivalent. 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see http://www.unicode.org/notes/tn5/#FCD 7127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_FCD, 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /** 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Compose only contiguously. 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Also known as "FCC" or "Fast C Contiguous". 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result will often but not always be in NFC. 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result will conform to FCD which is useful for processing. 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Not a standard Unicode normalization form. 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see http://www.unicode.org/notes/tn5/#FCC 8127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_COMPOSE_CONTIGUOUS 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} UNormalization2Mode; 8550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Result values for normalization quick check functions. 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.0 9050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 9150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehotypedef enum UNormalizationCheckResult { 9227f654740f2a26ad62a5c155af9199af9e69b889claireho /** 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The input string is not in the normalization form. 9450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.0 9550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 9650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM_NO, 9727f654740f2a26ad62a5c155af9199af9e69b889claireho /** 9850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The input string is in the normalization form. 9950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.0 10050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM_YES, 10227f654740f2a26ad62a5c155af9199af9e69b889claireho /** 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The input string may or may not be in the normalization form. 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This value is only returned for composition forms like NFC and FCC, 10550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * when a backward-combining character is found for which the surrounding text 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would have to be analyzed further. 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @stable ICU 2.0 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM_MAYBE 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} UNormalizationCheckResult; 11150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 11350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Opaque C service object type for the new normalization API. 11427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 11550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct UNormalizer2; 11727f654740f2a26ad62a5c155af9199af9e69b889clairehotypedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 121103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 122103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFC normalization. 123103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). 124103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it. 12554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 126103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 127103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 128103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 129103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful 1308393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 131103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 1328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2 133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFCInstance(UErrorCode *pErrorCode); 134103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 135103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 136103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFD normalization. 137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). 138103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it. 13954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 140103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 141103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 142103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 143103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful 1448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 145103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 1468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2 147103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFDInstance(UErrorCode *pErrorCode); 148103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 149103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 150103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKC normalization. 151103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). 152103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it. 15354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 154103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 155103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 156103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 157103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful 1588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 159103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 1608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2 161103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKCInstance(UErrorCode *pErrorCode); 162103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 163103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 164103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKD normalization. 165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). 166103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it. 16754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 168103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 169103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 170103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 171103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful 1728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 173103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 1748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2 175103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKDInstance(UErrorCode *pErrorCode); 176103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 177103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 178103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. 179103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). 180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns an unmodifiable singleton instance. Do not delete it. 18154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 182103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 183103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 184103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 185103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the requested Normalizer2, if successful 1868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 187103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 1888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE const UNormalizer2 * U_EXPORT2 189103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); 190103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns a UNormalizer2 instance which uses the specified data file 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and which composes or decomposes text according to the specified mode. 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns an unmodifiable singleton instance. Do not delete it. 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use packageName=NULL for data files that are part of ICU's own data. 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param packageName NULL for ICU built-in data, otherwise application data package name 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param mode normalization mode (compose or decompose etc.) 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 20750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 20850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 20950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested UNormalizer2, if successful 21027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 21150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 21227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE const UNormalizer2 * U_EXPORT2 21350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_getInstance(const char *packageName, 21450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const char *name, 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalization2Mode mode, 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Constructs a filtered normalizer wrapping any UNormalizer2 instance 22050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and a filter set. 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Both are aliased and must not be modified or deleted while this object 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * is used. 22350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The filter set should be frozen; otherwise the performance will suffer greatly. 22427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param norm2 wrapped UNormalizer2 instance 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param filterSet USet which determines the characters to be normalized 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return the requested UNormalizer2, if successful 23127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 23327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UNormalizer2 * U_EXPORT2 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Closes a UNormalizer2 instance from unorm2_openFiltered(). 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Do not close instances from unorm2_getInstance()! 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance to be closed 24027f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 24150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 24227f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE void U_EXPORT2 24350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_close(UNormalizer2 *norm2); 24450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if U_SHOW_CPLUSPLUS_API 24650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_BEGIN 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * \class LocalUNormalizer2Pointer 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For most methods see the LocalPointerBase base class. 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointerBase 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @see LocalPointer 25627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 25850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); 25950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_NAMESPACE_END 26150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 26350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 26450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Writes the normalized form of the source string to the destination string 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (replacing its contents) and returns the length of the destination string. 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The source and destination strings must be different buffers. 26850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 26950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param src source string 27050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the source string, or -1 if NUL-terminated 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param dest destination string; its contents is replaced with normalized src 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param capacity number of UChars that can be written to dest 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 27650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return dest 27827f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 28027f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2 28150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_normalize(const UNormalizer2 *norm2, 28250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *src, int32_t length, 28350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *dest, int32_t capacity, 28450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 28550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 28650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the normalized form of the second string to the first string 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the length of the first string. 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if the first string was normalized. 28950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different buffers. 29050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 29150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 29250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstLength length of the first string, or -1 if NUL-terminated 29350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstCapacity number of UChars that can be written to first 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, will be normalized 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param secondLength length of the source string, or -1 if NUL-terminated 29650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 29850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 29950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 30127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 30250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 30327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2 30450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 30550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *first, int32_t firstLength, int32_t firstCapacity, 30650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *second, int32_t secondLength, 30750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 30850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Appends the second string to the first string 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (merging them at the boundary) and returns the length of the first string. 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The result is normalized if both the strings were normalized. 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The first and second strings must be different buffers. 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param first string, should be normalized 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstLength length of the first string, or -1 if NUL-terminated 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param firstCapacity number of UChars that can be written to first 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param second string, should be normalized 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param secondLength length of the source string, or -1 if NUL-terminated 31950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 32050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return first 32427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 32627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_append(const UNormalizer2 *norm2, 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *first, int32_t firstLength, int32_t firstCapacity, 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *second, int32_t secondLength, 33050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 33150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 333103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the decomposition mapping of c. 334103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Roughly equivalent to normalizing the String form of c 335103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function 336103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * returns a negative value and does not write a string 337103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * if c does not have a decomposition mapping in this instance's data. 33827f654740f2a26ad62a5c155af9199af9e69b889claireho * This function is independent of the mode of the UNormalizer2. 33927f654740f2a26ad62a5c155af9199af9e69b889claireho * @param norm2 UNormalizer2 instance 34027f654740f2a26ad62a5c155af9199af9e69b889claireho * @param c code point 34127f654740f2a26ad62a5c155af9199af9e69b889claireho * @param decomposition String buffer which will be set to c's 34227f654740f2a26ad62a5c155af9199af9e69b889claireho * decomposition mapping, if there is one. 34327f654740f2a26ad62a5c155af9199af9e69b889claireho * @param capacity number of UChars that can be written to decomposition 34427f654740f2a26ad62a5c155af9199af9e69b889claireho * @param pErrorCode Standard ICU error code. Its input value must 34527f654740f2a26ad62a5c155af9199af9e69b889claireho * pass the U_SUCCESS() test, or else the function returns 34627f654740f2a26ad62a5c155af9199af9e69b889claireho * immediately. Check for U_FAILURE() on output or use with 34727f654740f2a26ad62a5c155af9199af9e69b889claireho * function chaining. (See User Guide for details.) 34827f654740f2a26ad62a5c155af9199af9e69b889claireho * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value 349103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.6 35027f654740f2a26ad62a5c155af9199af9e69b889claireho */ 35154dcd9b6a06071f647dac967e9e267abb9410720Craig CorneliusU_STABLE int32_t U_EXPORT2 35227f654740f2a26ad62a5c155af9199af9e69b889clairehounorm2_getDecomposition(const UNormalizer2 *norm2, 35327f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c, UChar *decomposition, int32_t capacity, 35427f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode *pErrorCode); 35527f654740f2a26ad62a5c155af9199af9e69b889claireho 356103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 357103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the raw decomposition mapping of c. 358103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 359103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This is similar to the unorm2_getDecomposition() function but returns the 360103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * raw decomposition mapping as specified in UnicodeData.txt or 361103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * (for custom data) in the mapping files processed by the gennorm2 tool. 362103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * By contrast, unorm2_getDecomposition() returns the processed, 363103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * recursively-decomposed version of this mapping. 364103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 365103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * When used on a standard NFKC Normalizer2 instance, 366103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 367103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 368103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * When used on a standard NFC Normalizer2 instance, 369103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 370103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * in this case, the result contains either one or two code points (=1..4 UChars). 371103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 372103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This function is independent of the mode of the UNormalizer2. 373103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance 374103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param c code point 375103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param decomposition String buffer which will be set to c's 376103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * raw decomposition mapping, if there is one. 377103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param capacity number of UChars that can be written to decomposition 378103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param pErrorCode Standard ICU error code. Its input value must 379103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * pass the U_SUCCESS() test, or else the function returns 380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * immediately. Check for U_FAILURE() on output or use with 381103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * function chaining. (See User Guide for details.) 382103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value 3838393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 384103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 3858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE int32_t U_EXPORT2 386103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getRawDecomposition(const UNormalizer2 *norm2, 387103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UChar32 c, UChar *decomposition, int32_t capacity, 388103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UErrorCode *pErrorCode); 389103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 390103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 391103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Performs pairwise composition of a & b and returns the composite if there is one. 392103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 393103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Returns a composite code point c only if c has a two-way mapping to a+b. 394103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * In standard Unicode normalization, this means that 395103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * c has a canonical decomposition to a+b 396103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * and c does not have the Full_Composition_Exclusion property. 397103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * 398103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * This function is independent of the mode of the UNormalizer2. 399103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance 400103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param a A (normalization starter) code point. 401103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param b Another code point. 402103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return The non-negative composite code point if there is one; otherwise a negative value. 4038393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 404103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 4058393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE UChar32 U_EXPORT2 406103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); 407103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 408103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius/** 409103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * Gets the combining class of c. 410103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * The default implementation returns 0 411103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * but all standard implementations return the Unicode Canonical_Combining_Class value. 412103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param norm2 UNormalizer2 instance 413103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @param c code point 414103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @return c's combining class 4158393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * @stable ICU 49 416103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius */ 4178393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusU_STABLE uint8_t U_EXPORT2 418103e9ffba2cba345d0078eb8b8db33249f81840aCraig Corneliusunorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); 419103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius 42027f654740f2a26ad62a5c155af9199af9e69b889claireho/** 42150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 42250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Internally, in cases where the quickCheck() method would return "maybe" 42350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (which is only possible for the two COMPOSE modes) this method 42450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * resolves to "yes" or "no" to provide a definitive result, 42550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * at the cost of doing more work in those cases. 42650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 42750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 42850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated 42950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 43050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 43350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if s is normalized 43427f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 43550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 43627f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2 43750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_isNormalized(const UNormalizer2 *norm2, 43850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *s, int32_t length, 43950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 44050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 44250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the string is normalized. 44350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For the two COMPOSE modes, the result could be "maybe" in cases that 44450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * would take a little more work to resolve definitively. 44550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 44650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * combination of quick check + normalization, to avoid 44750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * re-checking the "yes" prefix. 44850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated 45150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 45250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 45350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 45450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 45550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return UNormalizationCheckResult 45627f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 45750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 45827f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UNormalizationCheckResult U_EXPORT2 45950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_quickCheck(const UNormalizer2 *norm2, 46050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *s, int32_t length, 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 46250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 46450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Returns the end of the normalized substring of the input string. 46550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 46650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the substring <code>UnicodeString(s, 0, end)</code> 46750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * will pass the quick check with a "yes" result. 46850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 46950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The returned end index is usually one or more characters before the 47050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * "no" or "maybe" character: The end index is at a normalization boundary. 47150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * (See the class documentation for more about normalization boundaries.) 47250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * 47350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * When the goal is a normalized string and most input strings are expected 47450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * to be normalized already, then call this method, 47550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * and if it returns a prefix shorter than the input string, 47650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * copy that prefix and use normalizeSecondAndAppend() for the remainder. 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param s input string 47950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param length length of the string, or -1 if NUL-terminated 48050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param pErrorCode Standard ICU error code. Its input value must 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * pass the U_SUCCESS() test, or else the function returns 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * immediately. Check for U_FAILURE() on output or use with 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * function chaining. (See User Guide for details.) 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return "yes" span end index 48527f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 48727f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE int32_t U_EXPORT2 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const UChar *s, int32_t length, 49050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode); 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary before it, 49450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 49550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 49650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 49750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 49850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary before it 49927f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 50050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 50127f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); 50350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 50450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character always has a normalization boundary after it, 50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * regardless of context. 50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c has a normalization boundary after it 51127f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 51250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 51327f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2 51450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 51650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/** 51750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Tests if the character is normalization-inert. 51850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * For details see the Normalizer2 base class documentation. 51950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param norm2 UNormalizer2 instance 52050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @param c character to test 52150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * @return TRUE if c is normalization-inert 52227f654740f2a26ad62a5c155af9199af9e69b889claireho * @stable ICU 4.4 52350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho */ 52427f654740f2a26ad62a5c155af9199af9e69b889clairehoU_STABLE UBool U_EXPORT2 52550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm2_isInert(const UNormalizer2 *norm2, UChar32 c); 52650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 52750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* !UCONFIG_NO_NORMALIZATION */ 52850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* __UNORM2_H__ */ 529