10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
6c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Copyright (c) 1996-2015, International Business Machines Corporation and
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef NORMLZR_H
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NORMLZR_H
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \file
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \brief C++ API: Unicode Normalization
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h"
2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normalizer2.h"
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h"
2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uobject.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
31c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Old Unicode normalization API.
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
33c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * This API has been replaced by the Normalizer2 class and is only available
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for backward compatibility. This class simply delegates to the Normalizer2 class.
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * There is one exception: The new API does not provide a replacement for Normalizer::compare().
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
37c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * The Normalizer class supports the standard normalization forms described in
38c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
39c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
40c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert *
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Normalizer class consists of two parts:
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - static functions that normalize strings or test if strings are normalized
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - a Normalizer object is an iterator that takes any kind of text and
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   provides iteration over its normalized form
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Normalizer class is not suitable for subclassing.
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For basic information about normalization forms and details about the C API
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * please see the documentation in unorm.h.
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The iterator API with the Normalizer constructors and the non-static functions
5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * use a CharacterIterator as input. It is possible to pass a string which
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is then internally wrapped in a CharacterIterator.
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The input text is not normalized all at once, but incrementally where needed
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (providing efficient random access).
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This allows to pass in a large text but spend only a small amount of time
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * normalizing a small part of that text.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * However, if the entire text is normalized, then the iterator will be
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * slower than normalizing the entire text at once and iterating over the result.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A possible use of the Normalizer iterator is also to report an index into the
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * original text that is close to where the normalized characters come from.
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The earlier implementation reported the getIndex() inconsistently,
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and previous() could not be used after setIndex(), next(), first(), and current().
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Normalizer allows to start normalizing from anywhere in the input text by
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling setIndexOnly(), first(), or last().
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Without calling any of these, the iterator will start at the beginning of the text.
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * At any time, next() returns the next normalized code point (UChar32),
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with post-increment semantics (like CharacterIterator::next32PostInc()).
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * previous() returns the previous normalized code point (UChar32),
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with pre-decrement semantics (like CharacterIterator::previous32()).
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * current() returns the current code point
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (respectively the one at the newly set index) without moving
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the getIndex(). Note that if the text at the current position
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * needs to be normalized, then these functions will do that.
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (This is why current() is not const.)
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is more efficient to call setIndexOnly() instead, which does not
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * normalize.
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * getIndex() always refers to the position in the input text where the normalized
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points are returned from. It does not always change with each returned
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point.
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The code point that is returned from any of the functions
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * corresponds to text at or after getIndex(), according to the
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * function's iteration semantics (post-increment or pre-decrement).
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * next() returns a code point from at or after the getIndex()
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from before the next() call. After the next() call, the getIndex()
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * might have moved to where the next code point will be returned from
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (from a next() or current() call).
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is semantically equivalent to array access with array[index++]
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (post-increment semantics).
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * previous() returns a code point from at or after the getIndex()
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from after the previous() call.
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is semantically equivalent to array access with array[--index]
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (pre-decrement semantics).
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Internally, the Normalizer iterator normalizes a small piece of text
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * starting at the getIndex() and ending at a following "safe" index.
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The normalized results is stored in an internal string buffer, and
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the code points are iterated from there.
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * With multiple iteration calls, this is repeated until the next piece
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of text needs to be normalized, and the getIndex() needs to be moved.
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The following "safe" index, the internal buffer, and the secondary
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * iteration index into that buffer are not exposed on the API.
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This also means that it is currently not practical to return to
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a particular, arbitrary position in the text because one would need to
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * know, and be able to set, in addition to the getIndex(), at least also the
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * current index into the internal buffer.
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is currently only possible to observe when getIndex() changes
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (with careful consideration of the iteration semantics),
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at which time the internal index will be 0.
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, if getIndex() is different after next() than before it,
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the internal index is 0 and one can return to this getIndex()
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * later with setIndexOnly().
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note: While the setIndex() and getIndex() refer to indices in the
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * underlying Unicode input text, the next() and previous() methods
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * iterate through characters in the normalized output.
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This means that there is not necessarily a one-to-one correspondence
12750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * between characters returned by next() and previous() and the indices
12850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * passed to and returned from setIndex() and getIndex().
12950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * It is for this reason that Normalizer does not implement the CharacterIterator interface.
13050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Laura Werner, Mark Davis, Markus Scherer
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.0
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_COMMON_API Normalizer : public UObject {
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
136c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#ifndef U_HIDE_DEPRECATED_API
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If DONE is returned from an iteration function that returns a code point,
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * then there are no more normalization results available.
140c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  enum {
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      DONE=0xffff
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  };
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Constructors
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of a given string.
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param str   The string to be normalized.  The normalization
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
156c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const UnicodeString& str, UNormalizationMode mode);
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of a given string.
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param str   The string to be normalized.  The normalization
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param length Length of the string, or -1 if NUL-terminated.
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
169c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
1710596faeddefbf198de137d5e893708495ab1584cFredrik Roubert  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of the given text.
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param iter  The input text to be normalized.  The normalization
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
181c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
18464339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert#endif  /* U_HIDE_DEPRECATED_API */
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Copy constructor.
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param copy The object to be copied.
189c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const Normalizer& copy);
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Destructor
195c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  virtual ~Normalizer();
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Static utility methods
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
204c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#ifndef U_HIDE_DEPRECATED_API
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the input string to be normalized.
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode      the normalization mode
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The normalized string (on output).
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
217c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 normalize(const UnicodeString& source,
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UNormalizationMode mode, int32_t options,
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString& result,
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode &status);
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Compose a <code>UnicodeString</code>.
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the string to be composed.
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param compat    Perform compatibility decomposition before composition.
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  If this argument is <code>FALSE</code>, only canonical
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  decomposition will be performed.
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The composed string (on output).
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
239c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 compose(const UnicodeString& source,
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool compat, int32_t options,
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UnicodeString& result,
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode &status);
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Static method to decompose a <code>UnicodeString</code>.
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the string to be decomposed.
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param compat    Perform compatibility decomposition.
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  If this argument is <code>FALSE</code>, only canonical
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  decomposition will be performed.
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The decomposed string (on output).
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
261c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 decompose(const UnicodeString& source,
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UBool compat, int32_t options,
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString& result,
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode &status);
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Performing quick check on a string, to quickly determine if the string is
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in a particular normalization format.
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Three types of result can be returned UNORM_YES, UNORM_NO or
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * string is in the desired normalized format, UNORM_NO determines that
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * argument string is not in the desired normalized format. A
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UNORM_MAYBE result indicates that a more thorough check is required,
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the user may have to put the string in its normalized form and compare the
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * results.
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source       string for determining if it is in a normalized format
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode         normalization format
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status A reference to a UErrorCode to receive any errors
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see isNormalized
286c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline UNormalizationCheckResult
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Performing quick check on a string; same as the other version of quickCheck
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * but takes an extra options parameter like most normalization functions.
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source       string for determining if it is in a normalized format
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode         normalization format
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options      the optional features to be enabled (0 for no options)
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status A reference to a UErrorCode to receive any errors
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see isNormalized
302c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
30450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  static UNormalizationCheckResult
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Test if a string is in a given normalization form.
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is semantically equivalent to source.equals(normalize(source, mode)) .
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Unlike unorm_quickCheck(), this function returns a definitive result,
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * never a "maybe".
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For NFD, NFKD, and FCD, both functions work exactly the same.
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For NFC and NFKC where quickCheck may return "maybe", this function will
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * perform further tests to arrive at a TRUE/FALSE result.
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param src        String that is to be tested if it is in a normalization format.
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode       Which normalization form to test for.
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode  ICU error code in/out parameter.
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return Boolean value indicating whether the source string is in the
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *         "mode" normalization form.
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see quickCheck
325c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline UBool
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Test if a string is in a given normalization form; same as the other version of isNormalized
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * but takes an extra options parameter like most normalization functions.
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param src        String that is to be tested if it is in a normalization format.
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode       Which normalization form to test for.
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options      the optional features to be enabled (0 for no options)
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode  ICU error code in/out parameter.
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return Boolean value indicating whether the source string is in the
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *         "mode" normalization form.
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see quickCheck
343c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  static UBool
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Concatenate normalized strings, making sure that the result is normalized as well.
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If both the left and the right strings are in
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the normalization form according to "mode/options",
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * then the result will be
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * \code
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     dest=normalize(left+right, mode, options)
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * \endcode
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For details see unorm_concatenate in unorm.h.
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param left Left source string.
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param right Right source string.
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result The output string.
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode The normalization mode.
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options A bit set of normalization options.
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode ICU error code in/out parameter.
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return result
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_concatenate
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see normalize
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_next
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_previous
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
375c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static UnicodeString &
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UnicodeString &result,
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UNormalizationMode mode, int32_t options,
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode &errorCode);
382c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#endif  /* U_HIDE_DEPRECATED_API */
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Compare two strings for canonical equivalence.
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Further options include case-insensitive comparison and
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * code point order (as opposed to code unit order).
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Canonical equivalence between two strings is defined as their normalized
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * forms (NFD or NFC) being identical.
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This function compares strings incrementally instead of normalizing
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (and optionally case-folding) both strings entirely,
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * improving performance significantly.
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Bulk normalization is only necessary if the strings do not fulfill the FCD
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * conditions. Only in this case, and only if the strings are relatively long,
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * is memory allocated temporarily.
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For FCD strings and short non-FCD strings there is no memory allocation.
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Semantically, this is equivalent to
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * where code point order and foldCase are all optional.
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the case folding must be performed first, then the normalization.
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param s1 First source string.
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param s2 Second source string.
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options A bit set of options:
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Case-sensitive comparison in code unit order, and the input strings
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     are quick-checked for FCD.
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - UNORM_INPUT_IS_FCD
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     If not set, the function will quickCheck for FCD
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     and normalize if necessary.
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_COMPARE_CODE_POINT_ORDER
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set to choose code point order instead of code unit order
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     (see u_strCompare for details).
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_COMPARE_IGNORE_CASE
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set to compare strings case-insensitively using case folding,
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     instead of case-sensitively.
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     If set, then the following case folding options are used.
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - Options as used with case-insensitive comparisons, currently:
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *    (see u_strCaseCompare for details)
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode ICU error code in/out parameter.
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  Must fulfill U_SUCCESS before the function call.
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return <0 or 0 or >0 as usual for string comparisons
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_compare
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see normalize
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see UNORM_FCD
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see u_strCompare
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see u_strCaseCompare
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.2
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline int32_t
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  compare(const UnicodeString &s1, const UnicodeString &s2,
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          uint32_t options,
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          UErrorCode &errorCode);
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
453c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#ifndef U_HIDE_DEPRECATED_API
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Iteration API
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the current character in the normalized text.
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * current() may need to normalize some text at getIndex().
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The getIndex() is not changed.
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the current normalized code point
464c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              current(void);
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the first character in the normalized text.
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(startIndex()) followed by next().
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Post-increment semantics.)
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the first normalized code point
474c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              first(void);
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the last character in the normalized text.
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(endIndex()) followed by previous().
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Pre-decrement semantics.)
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the last normalized code point
484c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              last(void);
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the next character in the normalized text.
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Post-increment semantics.)
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If the end of the text has already been reached, DONE is returned.
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The DONE value could be confused with a U+FFFF non-character code point
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in the text. If this is possible, you can test getIndex()<endIndex()
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * after calling next(). (Calling last() will change the iterator state!)
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The C API unorm_next() is more efficient and does not have this ambiguity.
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the next normalized code point
500c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              next(void);
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the previous character in the normalized text and decrement.
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Pre-decrement semantics.)
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If the beginning of the text has already been reached, DONE is returned.
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The DONE value could be confused with a U+FFFF non-character code point
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in the text. If this is possible, you can test
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the iterator state!)
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The C API unorm_previous() is more efficient and does not have this ambiguity.
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the previous normalized code point
516c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              previous(void);
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the iteration position in the input text that is being normalized,
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * without any immediate normalization.
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * After setIndexOnly(), getIndex() will return the same index that is
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * specified here.
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param index the desired index in the input text.
527c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void                 setIndexOnly(int32_t index);
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Reset the index to the beginning of the text.
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(startIndex)).
534c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void                reset(void);
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the current iteration position in the input text that is
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * being normalized.
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * A following call to next() will return a normalized code point from
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the input text at or after this index.
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * After a call to previous(), getIndex() will point at or before the
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * position in the input text where the normalized code point
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * was returned from with previous().
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the current index in the input text
550c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            getIndex(void) const;
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the index of the start of the input text. This is the begin index
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over which this <code>Normalizer</code> is iterating.
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the smallest index in the input text where the Normalizer operates
560c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            startIndex(void) const;
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the index of the end of the input text. This is the end index
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of the <code>CharacterIterator</code> or the length of the string
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over which this <code>Normalizer</code> is iterating.
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This end index is exclusive, i.e., the Normalizer operates only on characters
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * before this index.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the first index in the input text where the Normalizer does not operate
572c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            endIndex(void) const;
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns TRUE when both iterators refer to the same character in the same
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * input text.
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param that a Normalizer object to compare this one to
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return comparison result
582c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool        operator==(const Normalizer& that) const;
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns FALSE when both iterators refer to the same character in the same
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * input text.
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param that a Normalizer object to compare this one to
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return comparison result
592c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  inline UBool        operator!=(const Normalizer& that) const;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns a pointer to a new Normalizer that is a clone of this one.
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The caller is responsible for deleting the new clone.
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return a pointer to a new Normalizer
600c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer*        clone(void) const;
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Generates a hash code for this iterator.
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the hash code
608c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t                hashCode(void) const;
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Property access methods
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the normalization mode for this object.
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <b>Note:</b>If the normalization mode is changed while iterating
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over a string, calls to {@link #next() } and {@link #previous() } may
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * return previously buffers characters in the old normalization mode
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * until the iteration is able to re-sync at the next base character.
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * {@link #setText }, {@link #first() },
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * {@link #last() }, etc. after calling <code>setMode</code>.
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newMode the new mode for this <code>Normalizer</code>.
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #getUMode
629c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setMode(UNormalizationMode newMode);
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the normalization mode for this object.
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is an unusual name because there used to be a getMode() that
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * returned a different type.
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the mode for this <code>Normalizer</code>
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #setMode
641c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UNormalizationMode getUMode(void) const;
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set options that affect this <code>Normalizer</code>'s operation.
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Options do not change the basic composition or decomposition operation
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * that is being performed, but they control whether
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * certain optional portions of the operation are done.
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Currently the only available option is obsolete.
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * It is possible to specify multiple options that are all turned on or off.
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param   option  the option(s) whose value is/are to be set.
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param   value   the new setting for the option.  Use <code>TRUE</code> to
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #getOption
659c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setOption(int32_t option,
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         UBool value);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Determine whether an option is turned on or off.
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If multiple options are specified, then the result is TRUE if any
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of them are set.
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param option the option(s) that are to be checked
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return TRUE if any of the option(s) are set
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #setOption
672c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool getOption(int32_t option) const;
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a string that replaces the current input text
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
682c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setText(const UnicodeString& newText,
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           UErrorCode &status);
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a CharacterIterator object that replaces the current input text
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
693c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setText(const CharacterIterator& newText,
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           UErrorCode &status);
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a string that replaces the current input text
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param length the length of the string, or -1 if NUL-terminated
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
705c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
7070596faeddefbf198de137d5e893708495ab1584cFredrik Roubert  void setText(ConstChar16Ptr newText,
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t length,
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode &status);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Copies the input text into the UnicodeString argument.
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result Receives a copy of the text under iteration.
714c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void            getText(UnicodeString&  result);
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * ICU "poor man's RTTI", returns a UClassID for this class.
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @returns a UClassID for this class.
721c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static UClassID U_EXPORT2 getStaticClassID();
724c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#endif  /* U_HIDE_DEPRECATED_API */
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * ICU "poor man's RTTI", returns a UClassID for the actual class.
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return a UClassID for the actual class.
729c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert   * @deprecated ICU 56 Use Normalizer2 instead.
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  virtual UClassID getDynamicClassID() const;
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private functions
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(); // default constructor not implemented
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private utility methods for iteration
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // For documentation, see the source code
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool nextNormalize();
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool previousNormalize();
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
74650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  void    init();
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void    clearBuffer(void);
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private data
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  FilteredNormalizer2*fFilteredNorm2;  // owned if not NULL
75450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
75564339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert  UNormalizationMode  fUMode;  // deprecated
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t             fOptions;
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // The input text and our position in it
75950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  CharacterIterator  *text;
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // The normalization buffer is the result of normalization
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // of the source in [currentIndex..nextIndex[ .
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t         currentIndex, nextIndex;
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // A buffer for holding intermediate results
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeString       buffer;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t         bufferPos;
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Inline implementations
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
774c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#ifndef U_HIDE_DEPRECATED_API
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::operator!= (const Normalizer& other) const
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ return ! operator==(other); }
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UNormalizationCheckResult
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::quickCheck(const UnicodeString& source,
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UNormalizationMode mode,
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UErrorCode &status) {
78350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return quickCheck(source, mode, 0, status);
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::isNormalized(const UnicodeString& source,
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UNormalizationMode mode,
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UErrorCode &status) {
79050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isNormalized(source, mode, 0, status);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
792c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#endif  /* U_HIDE_DEPRECATED_API */
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline int32_t
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    uint32_t options,
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &errorCode) {
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // all argument checking is done in unorm_compare
7990596faeddefbf198de137d5e893708495ab1584cFredrik Roubert  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
8000596faeddefbf198de137d5e893708495ab1584cFredrik Roubert                       toUCharPtr(s2.getBuffer()), s2.length(),
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       options,
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &errorCode);
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // NORMLZR_H
810