1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/*
2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *************************************************************************
3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT:
4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 1996-2010, International Business Machines Corporation and
5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved.
6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *************************************************************************
7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h"
10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION
12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h"
14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h"
15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/chariter.h"
16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h"
17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h"
18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normlzr.h"
19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h"
20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "normalizer2impl.h"
21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uprops.h"  // for uniset_getUnicode32Instance()
22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN
24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructors and other boilerplate
29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text(new StringCharacterIterator(str)),
34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex(0), nextIndex(0),
35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer(), bufferPos(0)
36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text(new UCharCharacterIterator(str, length)),
43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex(0), nextIndex(0),
44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer(), bufferPos(0)
45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text(iter.clone()),
52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex(0), nextIndex(0),
53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer(), bufferPos(0)
54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const Normalizer &copy) :
59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text(copy.text->clone()),
61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer(copy.buffer), bufferPos(copy.bufferPos)
63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UChar _NUL=0;
68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::init() {
71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(fOptions&UNORM_UNICODE_3_2) {
74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        delete fFilteredNorm2;
75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fNorm2=fFilteredNorm2=
76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_FAILURE(errorCode)) {
79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        errorCode=U_ZERO_ERROR;
80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::~Normalizer()
85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete fFilteredNorm2;
87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete text;
88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer*
91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::clone() const
92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return new Normalizer(*this);
94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Generates a hash code for this iterator.
98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::hashCode() const
100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool Normalizer::operator==(const Normalizer& that) const
105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return
107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        this==&that ||
108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        (fUMode==that.fUMode &&
109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fOptions==that.fOptions &&
110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *text==*that.text &&
111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        buffer==that.buffer &&
112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bufferPos==that.bufferPos &&
113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        nextIndex==that.nextIndex);
114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Static utility methods
118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2
121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::normalize(const UnicodeString& source,
122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UNormalizationMode mode, int32_t options,
123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UnicodeString& result,
124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UErrorCode &status) {
125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(source.isBogus() || U_FAILURE(status)) {
126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result.setToBogus();
127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_SUCCESS(status)) {
128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            status=U_ILLEGAL_ARGUMENT_ERROR;
129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString localDest;
132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString *dest;
133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(&source!=&result) {
135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dest=&result;
136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // the source and result strings are the same object, use a temporary one
138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dest=&localDest;
139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_SUCCESS(status)) {
142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(options&UNORM_UNICODE_3_2) {
143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    normalize(source, *dest, status);
145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                n2->normalize(source, *dest, status);
147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(dest==&localDest && U_SUCCESS(status)) {
150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result=*dest;
151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2
156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::compose(const UnicodeString& source,
157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UBool compat, int32_t options,
158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UnicodeString& result,
159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode &status) {
160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2
164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::decompose(const UnicodeString& source,
165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UBool compat, int32_t options,
166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UnicodeString& result,
167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UErrorCode &status) {
168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UNormalizationCheckResult
172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::quickCheck(const UnicodeString& source,
173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UNormalizationMode mode, int32_t options,
174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                       UErrorCode &status) {
175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(status)) {
177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(options&UNORM_UNICODE_3_2) {
178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                quickCheck(source, status);
180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return n2->quickCheck(source, status);
182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return UNORM_MAYBE;
185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool
189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::isNormalized(const UnicodeString& source,
190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         UNormalizationMode mode, int32_t options,
191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                         UErrorCode &status) {
192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(U_SUCCESS(status)) {
194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(options&UNORM_UNICODE_3_2) {
195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                isNormalized(source, status);
197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            return n2->isNormalized(source, status);
199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString & U_EXPORT2
206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::concatenate(UnicodeString &left, UnicodeString &right,
207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        UnicodeString &result,
208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        UNormalizationMode mode, int32_t options,
209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                        UErrorCode &errorCode) {
210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        result.setToBogus();
212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_SUCCESS(errorCode)) {
213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString localDest;
217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UnicodeString *dest;
218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(&right!=&result) {
220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dest=&result;
221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        } else {
222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            // the right and result strings are the same object, use a temporary one
223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            dest=&localDest;
224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        *dest=left;
226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(U_SUCCESS(errorCode)) {
228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            if(options&UNORM_UNICODE_3_2) {
229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    append(*dest, right, errorCode);
231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            } else {
232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                n2->append(*dest, right, errorCode);
233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            }
234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(dest==&localDest && U_SUCCESS(errorCode)) {
236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            result=*dest;
237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return result;
240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Iteration API
244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the current character in the normalized text.
248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::current() {
250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bufferPos<buffer.length() || nextNormalize()) {
251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return buffer.char32At(bufferPos);
252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return DONE;
254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the next character in the normalized text and advance
259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the iteration position by one.  If the end
260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the text has already been reached, {@link #DONE} is returned.
261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::next() {
263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bufferPos<buffer.length() ||  nextNormalize()) {
264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c=buffer.char32At(bufferPos);
265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bufferPos+=UTF_CHAR_LENGTH(c);
266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return c;
267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return DONE;
269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the previous character in the normalized text and decrement
274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the iteration position by one.  If the beginning
275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the text has already been reached, {@link #DONE} is returned.
276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::previous() {
278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bufferPos>0 || previousNormalize()) {
279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c=buffer.char32At(bufferPos-1);
280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        bufferPos-=UTF_CHAR_LENGTH(c);
281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return c;
282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return DONE;
284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void Normalizer::reset() {
288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex=nextIndex=text->setToStart();
289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clearBuffer();
290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setIndexOnly(int32_t index) {
294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text->setIndex(index);  // pins index
295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex=nextIndex=text->getIndex();
296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clearBuffer();
297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the first character in the normalized text.  This resets
301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the <tt>Normalizer's</tt> position to the beginning of the text.
302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::first() {
304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return next();
306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the last character in the normalized text.  This resets
310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the <tt>Normalizer's</tt> position to be just before the
311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the input text corresponding to that normalized character.
312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::last() {
314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex=nextIndex=text->setToEnd();
315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clearBuffer();
316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return previous();
317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the current iteration position in the input text that is
321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * being normalized.  This method is useful in applications such as
322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * searching, where you need to be able to determine the position in
323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the input text that corresponds to a given normalized output character.
324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p>
325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <b>Note:</b> This method sets the position in the <em>input</em>, while
326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * {@link #next} and {@link #previous} iterate through characters in the
327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <em>output</em>.  This means that there is not necessarily a one-to-one
328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * correspondence between characters returned by <tt>next</tt> and
329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <tt>previous</tt> and the indices passed to and returned from
330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <tt>setIndex</tt> and {@link #getIndex}.
331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *
332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::getIndex() const {
334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(bufferPos<buffer.length()) {
335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return currentIndex;
336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return nextIndex;
338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the index of the start of the input text.  This is the begin index
343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * over which this <tt>Normalizer</tt> is iterating
345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::startIndex() const {
347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return text->startIndex();
348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the index of the end of the input text.  This is the end index
352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * over which this <tt>Normalizer</tt> is iterating
354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::endIndex() const {
356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return text->endIndex();
357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Property access methods
361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setMode(UNormalizationMode newMode)
365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fUMode = newMode;
367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UNormalizationMode
371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getUMode() const
372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return fUMode;
374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setOption(int32_t option,
378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                      UBool value)
379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (value) {
381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fOptions |= option;
382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    } else {
383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        fOptions &= (~option);
384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    init();
386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool
389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getOption(int32_t option) const
390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return (fOptions & option) != 0;
392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the input text over which this <tt>Normalizer</tt> will iterate.
396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The iteration position is set to the beginning of the input text.
397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const UnicodeString& newText,
400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode &status)
401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharacterIterator *newIter = new StringCharacterIterator(newText);
406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (newIter == NULL) {
407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete text;
411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text = newIter;
412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the input text over which this <tt>Normalizer</tt> will iterate.
417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The iteration position is set to the beginning of the string.
418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const CharacterIterator& newText,
421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode &status)
422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharacterIterator *newIter = newText.clone();
427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (newIter == NULL) {
428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete text;
432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text = newIter;
433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const UChar* newText,
438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    int32_t length,
439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)                    UErrorCode &status)
440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (U_FAILURE(status)) {
442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if (newIter == NULL) {
446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        status = U_MEMORY_ALLOCATION_ERROR;
447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return;
448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    delete text;
450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text = newIter;
451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    reset();
452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/**
455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copies the text under iteration into the UnicodeString referred to by "result".
456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result Receives a copy of the text under iteration.
457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */
458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void
459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getText(UnicodeString&  result)
460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){
461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text->getText(result);
462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Private utility methods
466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//-------------------------------------------------------------------------
467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void Normalizer::clearBuffer() {
469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    buffer.remove();
470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bufferPos=0;
471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool
474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::nextNormalize() {
475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clearBuffer();
476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex=nextIndex;
477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text->setIndex(nextIndex);
478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!text->hasNext()) {
479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    // Skip at least one character so we make progress.
482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString segment(text->next32PostInc());
483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(text->hasNext()) {
484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c;
485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            text->move32(-1, CharacterIterator::kCurrent);
487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        segment.append(c);
490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nextIndex=text->getIndex();
492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNorm2->normalize(segment, buffer, errorCode);
494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return U_SUCCESS(errorCode) && !buffer.isEmpty();
495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool
498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::previousNormalize() {
499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    clearBuffer();
500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    nextIndex=currentIndex;
501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    text->setIndex(currentIndex);
502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    if(!text->hasPrevious()) {
503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        return FALSE;
504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UnicodeString segment;
506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    while(text->hasPrevious()) {
507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        UChar32 c=text->previous32();
508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        segment.insert(0, c);
509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        if(fNorm2->hasBoundaryBefore(c)) {
510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)            break;
511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)        }
512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    }
513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    currentIndex=text->getIndex();
514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    UErrorCode errorCode=U_ZERO_ERROR;
515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    fNorm2->normalize(segment, buffer, errorCode);
516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    bufferPos=buffer.length();
517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)    return U_SUCCESS(errorCode) && !buffer.isEmpty();
518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)}
519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END
521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)
522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_NORMALIZATION */
523