1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (c) 1996-2011, International Business Machines Corporation and
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef NORMLZR_H
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define NORMLZR_H
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \file
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * \brief C++ API: Unicode Normalization
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h"
2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/normalizer2.h"
2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/unistr.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h"
2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uobject.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Normalizer class supports the standard normalization forms described in
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note: This API has been replaced by the Normalizer2 class and is only available
3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * for backward compatibility. This class simply delegates to the Normalizer2 class.
3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * There is one exception: The new API does not provide a replacement for Normalizer::compare().
3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Normalizer class consists of two parts:
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - static functions that normalize strings or test if strings are normalized
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * - a Normalizer object is an iterator that takes any kind of text and
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *   provides iteration over its normalized form
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Normalizer class is not suitable for subclassing.
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For basic information about normalization forms and details about the C API
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * please see the documentation in unorm.h.
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The iterator API with the Normalizer constructors and the non-static functions
4850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * use a CharacterIterator as input. It is possible to pass a string which
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is then internally wrapped in a CharacterIterator.
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The input text is not normalized all at once, but incrementally where needed
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (providing efficient random access).
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This allows to pass in a large text but spend only a small amount of time
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * normalizing a small part of that text.
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * However, if the entire text is normalized, then the iterator will be
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * slower than normalizing the entire text at once and iterating over the result.
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * A possible use of the Normalizer iterator is also to report an index into the
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * original text that is close to where the normalized characters come from.
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The earlier implementation reported the getIndex() inconsistently,
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and previous() could not be used after setIndex(), next(), first(), and current().
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Normalizer allows to start normalizing from anywhere in the input text by
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * calling setIndexOnly(), first(), or last().
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Without calling any of these, the iterator will start at the beginning of the text.
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * At any time, next() returns the next normalized code point (UChar32),
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with post-increment semantics (like CharacterIterator::next32PostInc()).
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * previous() returns the previous normalized code point (UChar32),
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with pre-decrement semantics (like CharacterIterator::previous32()).
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * current() returns the current code point
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (respectively the one at the newly set index) without moving
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the getIndex(). Note that if the text at the current position
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * needs to be normalized, then these functions will do that.
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (This is why current() is not const.)
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is more efficient to call setIndexOnly() instead, which does not
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * normalize.
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * getIndex() always refers to the position in the input text where the normalized
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code points are returned from. It does not always change with each returned
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * code point.
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The code point that is returned from any of the functions
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * corresponds to text at or after getIndex(), according to the
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * function's iteration semantics (post-increment or pre-decrement).
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * next() returns a code point from at or after the getIndex()
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from before the next() call. After the next() call, the getIndex()
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * might have moved to where the next code point will be returned from
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (from a next() or current() call).
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is semantically equivalent to array access with array[index++]
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (post-increment semantics).
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * previous() returns a code point from at or after the getIndex()
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from after the previous() call.
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This is semantically equivalent to array access with array[--index]
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (pre-decrement semantics).
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Internally, the Normalizer iterator normalizes a small piece of text
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * starting at the getIndex() and ending at a following "safe" index.
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The normalized results is stored in an internal string buffer, and
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the code points are iterated from there.
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * With multiple iteration calls, this is repeated until the next piece
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of text needs to be normalized, and the getIndex() needs to be moved.
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The following "safe" index, the internal buffer, and the secondary
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * iteration index into that buffer are not exposed on the API.
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This also means that it is currently not practical to return to
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a particular, arbitrary position in the text because one would need to
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * know, and be able to set, in addition to the getIndex(), at least also the
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * current index into the internal buffer.
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It is currently only possible to observe when getIndex() changes
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (with careful consideration of the iteration semantics),
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at which time the internal index will be 0.
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * For example, if getIndex() is different after next() than before it,
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then the internal index is 0 and one can return to this getIndex()
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * later with setIndexOnly().
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Note: While the setIndex() and getIndex() refer to indices in the
12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * underlying Unicode input text, the next() and previous() methods
12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * iterate through characters in the normalized output.
12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * This means that there is not necessarily a one-to-one correspondence
12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * between characters returned by next() and previous() and the indices
12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * passed to and returned from setIndex() and getIndex().
12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * It is for this reason that Normalizer does not implement the CharacterIterator interface.
12650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @author Laura Werner, Mark Davis, Markus Scherer
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @stable ICU 2.0
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass U_COMMON_API Normalizer : public UObject {
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If DONE is returned from an iteration function that returns a code point,
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * then there are no more normalization results available.
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  enum {
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      DONE=0xffff
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  };
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Constructors
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of a given string.
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param str   The string to be normalized.  The normalization
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const UnicodeString& str, UNormalizationMode mode);
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of a given string.
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param str   The string to be normalized.  The normalization
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param length Length of the string, or -1 if NUL-terminated.
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Creates a new <code>Normalizer</code> object for iterating over the
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * normalized form of the given text.
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param iter  The input text to be normalized.  The normalization
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *              will start at the beginning of the string.
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode  The normalization mode.
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Copy constructor.
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param copy The object to be copied.
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(const Normalizer& copy);
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Destructor
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  virtual ~Normalizer();
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Static utility methods
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the input string to be normalized.
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode      the normalization mode
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The normalized string (on output).
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 normalize(const UnicodeString& source,
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UNormalizationMode mode, int32_t options,
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString& result,
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode &status);
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Compose a <code>UnicodeString</code>.
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the string to be composed.
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param compat    Perform compatibility decomposition before composition.
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  If this argument is <code>FALSE</code>, only canonical
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  decomposition will be performed.
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The composed string (on output).
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 compose(const UnicodeString& source,
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool compat, int32_t options,
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UnicodeString& result,
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode &status);
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Static method to decompose a <code>UnicodeString</code>.
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_normalize(), using UnicodeString's.
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The <code>options</code> parameter specifies which optional
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <code>Normalizer</code> features are to be enabled for this operation.
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source    the string to be decomposed.
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param compat    Perform compatibility decomposition.
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  If this argument is <code>FALSE</code>, only canonical
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  decomposition will be performed.
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options   the optional features to be enabled (0 for no options)
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result    The decomposed string (on output).
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status    The error code.
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static void U_EXPORT2 decompose(const UnicodeString& source,
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UBool compat, int32_t options,
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString& result,
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode &status);
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Performing quick check on a string, to quickly determine if the string is
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in a particular normalization format.
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Three types of result can be returned UNORM_YES, UNORM_NO or
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UNORM_MAYBE. Result UNORM_YES indicates that the argument
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * string is in the desired normalized format, UNORM_NO determines that
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * argument string is not in the desired normalized format. A
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UNORM_MAYBE result indicates that a more thorough check is required,
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the user may have to put the string in its normalized form and compare the
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * results.
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source       string for determining if it is in a normalized format
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode         normalization format
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status A reference to a UErrorCode to receive any errors
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see isNormalized
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline UNormalizationCheckResult
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Performing quick check on a string; same as the other version of quickCheck
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * but takes an extra options parameter like most normalization functions.
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param source       string for determining if it is in a normalized format
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode         normalization format
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options      the optional features to be enabled (0 for no options)
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status A reference to a UErrorCode to receive any errors
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see isNormalized
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.6
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
29750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  static UNormalizationCheckResult
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Test if a string is in a given normalization form.
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is semantically equivalent to source.equals(normalize(source, mode)) .
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Unlike unorm_quickCheck(), this function returns a definitive result,
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * never a "maybe".
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For NFD, NFKD, and FCD, both functions work exactly the same.
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For NFC and NFKC where quickCheck may return "maybe", this function will
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * perform further tests to arrive at a TRUE/FALSE result.
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param src        String that is to be tested if it is in a normalization format.
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode       Which normalization form to test for.
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode  ICU error code in/out parameter.
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return Boolean value indicating whether the source string is in the
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *         "mode" normalization form.
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see quickCheck
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.2
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline UBool
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Test if a string is in a given normalization form; same as the other version of isNormalized
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * but takes an extra options parameter like most normalization functions.
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param src        String that is to be tested if it is in a normalization format.
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode       Which normalization form to test for.
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options      the optional features to be enabled (0 for no options)
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode  ICU error code in/out parameter.
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return Boolean value indicating whether the source string is in the
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *         "mode" normalization form.
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see quickCheck
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.6
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  static UBool
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Concatenate normalized strings, making sure that the result is normalized as well.
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If both the left and the right strings are in
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the normalization form according to "mode/options",
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * then the result will be
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * \code
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     dest=normalize(left+right, mode, options)
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * \endcode
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For details see unorm_concatenate in unorm.h.
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param left Left source string.
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param right Right source string.
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result The output string.
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param mode The normalization mode.
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options A bit set of normalization options.
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode ICU error code in/out parameter.
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                   Must fulfill U_SUCCESS before the function call.
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return result
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_concatenate
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see normalize
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_next
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_previous
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.1
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static UnicodeString &
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UnicodeString &result,
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UNormalizationMode mode, int32_t options,
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru              UErrorCode &errorCode);
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Compare two strings for canonical equivalence.
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Further options include case-insensitive comparison and
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * code point order (as opposed to code unit order).
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Canonical equivalence between two strings is defined as their normalized
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * forms (NFD or NFC) being identical.
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This function compares strings incrementally instead of normalizing
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (and optionally case-folding) both strings entirely,
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * improving performance significantly.
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Bulk normalization is only necessary if the strings do not fulfill the FCD
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * conditions. Only in this case, and only if the strings are relatively long,
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * is memory allocated temporarily.
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * For FCD strings and short non-FCD strings there is no memory allocation.
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Semantically, this is equivalent to
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * where code point order and foldCase are all optional.
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the case folding must be performed first, then the normalization.
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param s1 First source string.
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param s2 Second source string.
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param options A bit set of options:
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Case-sensitive comparison in code unit order, and the input strings
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     are quick-checked for FCD.
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - UNORM_INPUT_IS_FCD
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     If not set, the function will quickCheck for FCD
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     and normalize if necessary.
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_COMPARE_CODE_POINT_ORDER
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set to choose code point order instead of code unit order
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     (see u_strCompare for details).
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_COMPARE_IGNORE_CASE
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     Set to compare strings case-insensitively using case folding,
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     instead of case-sensitively.
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *     If set, then the following case folding options are used.
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - Options as used with case-insensitive comparisons, currently:
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *    (see u_strCaseCompare for details)
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param errorCode ICU error code in/out parameter.
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  Must fulfill U_SUCCESS before the function call.
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return <0 or 0 or >0 as usual for string comparisons
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see unorm_compare
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see normalize
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see UNORM_FCD
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see u_strCompare
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see u_strCaseCompare
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.2
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static inline int32_t
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  compare(const UnicodeString &s1, const UnicodeString &s2,
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          uint32_t options,
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          UErrorCode &errorCode);
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Iteration API
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the current character in the normalized text.
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * current() may need to normalize some text at getIndex().
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The getIndex() is not changed.
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the current normalized code point
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              current(void);
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the first character in the normalized text.
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(startIndex()) followed by next().
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Post-increment semantics.)
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the first normalized code point
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              first(void);
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the last character in the normalized text.
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(endIndex()) followed by previous().
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Pre-decrement semantics.)
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the last normalized code point
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              last(void);
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the next character in the normalized text.
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Post-increment semantics.)
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If the end of the text has already been reached, DONE is returned.
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The DONE value could be confused with a U+FFFF non-character code point
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in the text. If this is possible, you can test getIndex()<endIndex()
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * after calling next(). (Calling last() will change the iterator state!)
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The C API unorm_next() is more efficient and does not have this ambiguity.
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the next normalized code point
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              next(void);
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the previous character in the normalized text and decrement.
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (Pre-decrement semantics.)
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If the beginning of the text has already been reached, DONE is returned.
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The DONE value could be confused with a U+FFFF non-character code point
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * in the text. If this is possible, you can test
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the iterator state!)
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The C API unorm_previous() is more efficient and does not have this ambiguity.
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the previous normalized code point
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UChar32              previous(void);
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the iteration position in the input text that is being normalized,
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * without any immediate normalization.
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * After setIndexOnly(), getIndex() will return the same index that is
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * specified here.
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param index the desired index in the input text.
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void                 setIndexOnly(int32_t index);
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Reset the index to the beginning of the text.
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is equivalent to setIndexOnly(startIndex)).
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void                reset(void);
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the current iteration position in the input text that is
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * being normalized.
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * A following call to next() will return a normalized code point from
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * the input text at or after this index.
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * After a call to previous(), getIndex() will point at or before the
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * position in the input text where the normalized code point
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * was returned from with previous().
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the current index in the input text
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            getIndex(void) const;
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the index of the start of the input text. This is the begin index
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over which this <code>Normalizer</code> is iterating.
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the smallest index in the input text where the Normalizer operates
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            startIndex(void) const;
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Retrieve the index of the end of the input text. This is the end index
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of the <code>CharacterIterator</code> or the length of the string
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over which this <code>Normalizer</code> is iterating.
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This end index is exclusive, i.e., the Normalizer operates only on characters
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * before this index.
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the first index in the input text where the Normalizer does not operate
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t            endIndex(void) const;
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns TRUE when both iterators refer to the same character in the same
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * input text.
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param that a Normalizer object to compare this one to
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return comparison result
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool        operator==(const Normalizer& that) const;
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns FALSE when both iterators refer to the same character in the same
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * input text.
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param that a Normalizer object to compare this one to
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return comparison result
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  inline UBool        operator!=(const Normalizer& that) const;
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Returns a pointer to a new Normalizer that is a clone of this one.
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The caller is responsible for deleting the new clone.
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return a pointer to a new Normalizer
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer*        clone(void) const;
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Generates a hash code for this iterator.
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the hash code
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t                hashCode(void) const;
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Property access methods
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the normalization mode for this object.
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <b>Note:</b>If the normalization mode is changed while iterating
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * over a string, calls to {@link #next() } and {@link #previous() } may
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * return previously buffers characters in the old normalization mode
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * until the iteration is able to re-sync at the next base character.
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * It is safest to call {@link #setIndexOnly }, {@link #reset() },
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * {@link #setText }, {@link #first() },
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * {@link #last() }, etc. after calling <code>setMode</code>.
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newMode the new mode for this <code>Normalizer</code>.
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #getUMode
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setMode(UNormalizationMode newMode);
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Return the normalization mode for this object.
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * This is an unusual name because there used to be a getMode() that
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * returned a different type.
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return the mode for this <code>Normalizer</code>
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #setMode
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UNormalizationMode getUMode(void) const;
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set options that affect this <code>Normalizer</code>'s operation.
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Options do not change the basic composition or decomposition operation
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * that is being performed, but they control whether
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * certain optional portions of the operation are done.
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Currently the only available option is obsolete.
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * It is possible to specify multiple options that are all turned on or off.
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param   option  the option(s) whose value is/are to be set.
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param   value   the new setting for the option.  Use <code>TRUE</code> to
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #getOption
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setOption(int32_t option,
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru         UBool value);
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Determine whether an option is turned on or off.
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * If multiple options are specified, then the result is TRUE if any
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * of them are set.
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * <p>
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param option the option(s) that are to be checked
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return TRUE if any of the option(s) are set
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @see #setOption
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool getOption(int32_t option) const;
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a string that replaces the current input text
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setText(const UnicodeString& newText,
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           UErrorCode &status);
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a CharacterIterator object that replaces the current input text
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setText(const CharacterIterator& newText,
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru           UErrorCode &status);
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Set the input text over which this <code>Normalizer</code> will iterate.
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * The iteration position is set to the beginning.
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param newText a string that replaces the current input text
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param length the length of the string, or -1 if NUL-terminated
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param status a UErrorCode
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void setText(const UChar* newText,
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t length,
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode &status);
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * Copies the input text into the UnicodeString argument.
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   *
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @param result Receives a copy of the text under iteration.
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.0
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void            getText(UnicodeString&  result);
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * ICU "poor man's RTTI", returns a UClassID for this class.
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @returns a UClassID for this class.
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.2
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  static UClassID U_EXPORT2 getStaticClassID();
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  /**
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * ICU "poor man's RTTI", returns a UClassID for the actual class.
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @return a UClassID for the actual class.
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   * @stable ICU 2.2
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   */
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  virtual UClassID getDynamicClassID() const;
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private functions
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer(); // default constructor not implemented
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private utility methods for iteration
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // For documentation, see the source code
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool nextNormalize();
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UBool previousNormalize();
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  void    init();
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  void    clearBuffer(void);
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // Private data
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  //-------------------------------------------------------------------------
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
74350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  FilteredNormalizer2*fFilteredNorm2;  // owned if not NULL
74450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  const Normalizer2  *fNorm2;  // not owned; may be equal to fFilteredNorm2
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UNormalizationMode  fUMode;
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t             fOptions;
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // The input text and our position in it
74950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho  CharacterIterator  *text;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // The normalization buffer is the result of normalization
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // of the source in [currentIndex..nextIndex[ .
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t         currentIndex, nextIndex;
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // A buffer for holding intermediate results
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  UnicodeString       buffer;
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  int32_t         bufferPos;
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Inline implementations
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::operator!= (const Normalizer& other) const
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ return ! operator==(other); }
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UNormalizationCheckResult
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::quickCheck(const UnicodeString& source,
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UNormalizationMode mode,
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       UErrorCode &status) {
77250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return quickCheck(source, mode, 0, status);
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline UBool
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::isNormalized(const UnicodeString& source,
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UNormalizationMode mode,
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UErrorCode &status) {
77950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return isNormalized(source, mode, 0, status);
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline int32_t
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    uint32_t options,
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &errorCode) {
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  // all argument checking is done in unorm_compare
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru  return unorm_compare(s1.getBuffer(), s1.length(),
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       s2.getBuffer(), s2.length(),
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       options,
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &errorCode);
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif // NORMLZR_H
798