1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *************************************************************************
3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT:
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (c) 1996-2011, International Business Machines Corporation and
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved.
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *************************************************************************
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h"
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchriter.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h"
2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uprops.h"  // for uniset_getUnicode32Instance()
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors and other boilerplate
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text(new StringCharacterIterator(str)),
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    currentIndex(0), nextIndex(0),
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(), bufferPos(0)
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text(new UCharCharacterIterator(str, length)),
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    currentIndex(0), nextIndex(0),
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(), bufferPos(0)
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text(iter.clone()),
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    currentIndex(0), nextIndex(0),
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(), bufferPos(0)
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const Normalizer &copy) :
5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text(copy.text->clone()),
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer(copy.buffer), bufferPos(copy.bufferPos)
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar _NUL=0;
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
7050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::init() {
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(fOptions&UNORM_UNICODE_3_2) {
7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        delete fFilteredNorm2;
7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fNorm2=fFilteredNorm2=
7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_FAILURE(errorCode)) {
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        errorCode=U_ZERO_ERROR;
8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::~Normalizer()
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete fFilteredNorm2;
8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete text;
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer*
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::clone() const
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return new Normalizer(*this);
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generates a hash code for this iterator.
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::hashCode() const
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool Normalizer::operator==(const Normalizer& that) const
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        this==&that ||
10827f654740f2a26ad62a5c155af9199af9e69b889claireho        (fUMode==that.fUMode &&
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fOptions==that.fOptions &&
11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *text==*that.text &&
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buffer==that.buffer &&
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufferPos==that.bufferPos &&
11327f654740f2a26ad62a5c155af9199af9e69b889claireho        nextIndex==that.nextIndex);
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Static utility methods
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::normalize(const UnicodeString& source,
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UNormalizationMode mode, int32_t options,
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UnicodeString& result,
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode &status) {
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(source.isBogus() || U_FAILURE(status)) {
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.setToBogus();
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(U_SUCCESS(status)) {
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            status=U_ILLEGAL_ARGUMENT_ERROR;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString localDest;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString *dest;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(&source!=&result) {
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest=&result;
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // the source and result strings are the same object, use a temporary one
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest=&localDest;
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_SUCCESS(status)) {
14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(options&UNORM_UNICODE_3_2) {
14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    normalize(source, *dest, status);
14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                n2->normalize(source, *dest, status);
14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(dest==&localDest && U_SUCCESS(status)) {
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result=*dest;
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::compose(const UnicodeString& source,
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UBool compat, int32_t options,
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString& result,
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &status) {
16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::decompose(const UnicodeString& source,
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool compat, int32_t options,
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UnicodeString& result,
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UErrorCode &status) {
16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho
17150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUNormalizationCheckResult
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::quickCheck(const UnicodeString& source,
17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                       UNormalizationMode mode, int32_t options,
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                       UErrorCode &status) {
17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_SUCCESS(status)) {
17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(options&UNORM_UNICODE_3_2) {
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                quickCheck(source, status);
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        } else {
18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return n2->quickCheck(source, status);
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return UNORM_MAYBE;
18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::isNormalized(const UnicodeString& source,
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UNormalizationMode mode, int32_t options,
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                         UErrorCode &status) {
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(U_SUCCESS(status)) {
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(options&UNORM_UNICODE_3_2) {
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                isNormalized(source, status);
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            return n2->isNormalized(source, status);
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    } else {
20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        return FALSE;
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & U_EXPORT2
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoNormalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString &result,
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UNormalizationMode mode, int32_t options,
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UErrorCode &errorCode) {
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result.setToBogus();
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if(U_SUCCESS(errorCode)) {
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString localDest;
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString *dest;
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(&right!=&result) {
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest=&result;
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            // the right and result strings are the same object, use a temporary one
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            dest=&localDest;
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        *dest=left;
22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(U_SUCCESS(errorCode)) {
22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            if(options&UNORM_UNICODE_3_2) {
22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                    append(*dest, right, errorCode);
23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            } else {
23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho                n2->append(*dest, right, errorCode);
23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            }
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(dest==&localDest && U_SUCCESS(errorCode)) {
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result=*dest;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Iteration API
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the current character in the normalized text.
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::current() {
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bufferPos<buffer.length() || nextNormalize()) {
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return buffer.char32At(bufferPos);
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return DONE;
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the next character in the normalized text and advance
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the iteration position by one.  If the end
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the text has already been reached, {@link #DONE} is returned.
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::next() {
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bufferPos<buffer.length() ||  nextNormalize()) {
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c=buffer.char32At(bufferPos);
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufferPos+=UTF_CHAR_LENGTH(c);
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return c;
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return DONE;
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the previous character in the normalized text and decrement
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the iteration position by one.  If the beginning
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the text has already been reached, {@link #DONE} is returned.
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::previous() {
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bufferPos>0 || previousNormalize()) {
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UChar32 c=buffer.char32At(bufferPos-1);
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufferPos-=UTF_CHAR_LENGTH(c);
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return c;
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return DONE;
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Normalizer::reset() {
28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    currentIndex=nextIndex=text->setToStart();
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clearBuffer();
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setIndexOnly(int32_t index) {
29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text->setIndex(index);  // pins index
29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    currentIndex=nextIndex=text->getIndex();
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clearBuffer();
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Return the first character in the normalized text.  This resets
30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the <tt>Normalizer's</tt> position to the beginning of the text.
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::first() {
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return next();
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Return the last character in the normalized text.  This resets
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <tt>Normalizer's</tt> position to be just before the
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input text corresponding to that normalized character.
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::last() {
31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    currentIndex=nextIndex=text->setToEnd();
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clearBuffer();
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return previous();
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retrieve the current iteration position in the input text that is
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * being normalized.  This method is useful in applications such as
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * searching, where you need to be able to determine the position in
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input text that corresponds to a given normalized output character.
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <b>Note:</b> This method sets the position in the <em>input</em>, while
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * {@link #next} and {@link #previous} iterate through characters in the
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <em>output</em>.  This means that there is not necessarily a one-to-one
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * correspondence between characters returned by <tt>next</tt> and
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>previous</tt> and the indices passed to and returned from
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>setIndex</tt> and {@link #getIndex}.
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::getIndex() const {
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(bufferPos<buffer.length()) {
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return currentIndex;
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return nextIndex;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Retrieve the index of the start of the input text.  This is the begin index
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * over which this <tt>Normalizer</tt> is iterating
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::startIndex() const {
34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return text->startIndex();
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Retrieve the index of the end of the input text.  This is the end index
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * over which this <tt>Normalizer</tt> is iterating
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::endIndex() const {
35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    return text->endIndex();
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Property access methods
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setMode(UNormalizationMode newMode)
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fUMode = newMode;
36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUNormalizationMode
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getUMode() const
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return fUMode;
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setOption(int32_t option,
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool value)
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (value) {
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fOptions |= option;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fOptions &= (~option);
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    init();
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getOption(int32_t option) const
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (fOptions & option) != 0;
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set the input text over which this <tt>Normalizer</tt> will iterate.
39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The iteration position is set to the beginning of the input text.
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const UnicodeString& newText,
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &status)
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CharacterIterator *newIter = new StringCharacterIterator(newText);
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (newIter == NULL) {
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete text;
41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text = newIter;
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set the input text over which this <tt>Normalizer</tt> will iterate.
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The iteration position is set to the beginning of the string.
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const CharacterIterator& newText,
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &status)
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CharacterIterator *newIter = newText.clone();
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (newIter == NULL) {
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete text;
43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text = newIter;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const UChar* newText,
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t length,
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode &status)
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (newIter == NULL) {
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        status = U_MEMORY_ALLOCATION_ERROR;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    delete text;
45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text = newIter;
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    reset();
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copies the text under iteration into the UnicodeString referred to by "result".
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param result Receives a copy of the text under iteration.
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getText(UnicodeString&  result)
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text->getText(result);
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Private utility methods
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//-------------------------------------------------------------------------
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Normalizer::clearBuffer() {
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    buffer.remove();
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferPos=0;
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::nextNormalize() {
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clearBuffer();
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    currentIndex=nextIndex;
47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text->setIndex(nextIndex);
47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!text->hasNext()) {
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    // Skip at least one character so we make progress.
48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString segment(text->next32PostInc());
48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(text->hasNext()) {
48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c;
48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            text->move32(-1, CharacterIterator::kCurrent);
48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        segment.append(c);
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    nextIndex=text->getIndex();
49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode errorCode=U_ZERO_ERROR;
49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fNorm2->normalize(segment, buffer, errorCode);
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_SUCCESS(errorCode) && !buffer.isEmpty();
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::previousNormalize() {
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    clearBuffer();
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    nextIndex=currentIndex;
50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    text->setIndex(currentIndex);
50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if(!text->hasPrevious()) {
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return FALSE;
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UnicodeString segment;
50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    while(text->hasPrevious()) {
50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        UChar32 c=text->previous32();
50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        segment.insert(0, c);
50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        if(fNorm2->hasBoundaryBefore(c)) {
51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            break;
51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        }
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    currentIndex=text->getIndex();
51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    UErrorCode errorCode=U_ZERO_ERROR;
51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fNorm2->normalize(segment, buffer, errorCode);
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bufferPos=buffer.length();
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return U_SUCCESS(errorCode) && !buffer.isEmpty();
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */
523