16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * COPYRIGHT:
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (c) 1996-2012, International Business Machines Corporation and
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * others. All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *************************************************************************
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h"
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h"
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/chariter.h"
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/schriter.h"
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchriter.h"
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normlzr.h"
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uprops.h"  // for uniset_getUnicode32Instance()
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer)
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Constructors and other boilerplate
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) :
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text(new StringCharacterIterator(str)),
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex(0), nextIndex(0),
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer(), bufferPos(0)
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) :
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text(new UCharCharacterIterator(str, length)),
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex(0), nextIndex(0),
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer(), bufferPos(0)
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) :
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0),
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text(iter.clone()),
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex(0), nextIndex(0),
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer(), bufferPos(0)
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const Normalizer &copy) :
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions),
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text(copy.text->clone()),
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex(copy.currentIndex), nextIndex(copy.nextIndex),
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer(copy.buffer), bufferPos(copy.bufferPos)
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::init() {
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode=U_ZERO_ERROR;
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode);
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(fOptions&UNORM_UNICODE_3_2) {
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        delete fFilteredNorm2;
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fNorm2=fFilteredNorm2=
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode));
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        errorCode=U_ZERO_ERROR;
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fNorm2=Normalizer2Factory::getNoopInstance(errorCode);
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::~Normalizer()
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete fFilteredNorm2;
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete text;
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer*
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::clone() const
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return new Normalizer(*this);
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Generates a hash code for this iterator.
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::hashCode() const
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer::operator==(const Normalizer& that) const
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        this==&that ||
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        (fUMode==that.fUMode &&
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fOptions==that.fOptions &&
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *text==*that.text &&
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        buffer==that.buffer &&
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bufferPos==that.bufferPos &&
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        nextIndex==that.nextIndex);
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Static utility methods
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::normalize(const UnicodeString& source,
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UNormalizationMode mode, int32_t options,
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UnicodeString& result,
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UErrorCode &status) {
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(source.isBogus() || U_FAILURE(status)) {
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.setToBogus();
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_SUCCESS(status)) {
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            status=U_ILLEGAL_ARGUMENT_ERROR;
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString localDest;
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString *dest;
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(&source!=&result) {
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest=&result;
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // the source and result strings are the same object, use a temporary one
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest=&localDest;
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_SUCCESS(status)) {
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(options&UNORM_UNICODE_3_2) {
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    normalize(source, *dest, status);
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                n2->normalize(source, *dest, status);
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(dest==&localDest && U_SUCCESS(status)) {
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result=*dest;
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::compose(const UnicodeString& source,
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UBool compat, int32_t options,
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UnicodeString& result,
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode &status) {
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status);
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::decompose(const UnicodeString& source,
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UBool compat, int32_t options,
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UnicodeString& result,
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UErrorCode &status) {
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status);
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUNormalizationCheckResult
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::quickCheck(const UnicodeString& source,
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                       UNormalizationMode mode, int32_t options,
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                       UErrorCode &status) {
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_SUCCESS(status)) {
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(options&UNORM_UNICODE_3_2) {
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                quickCheck(source, status);
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return n2->quickCheck(source, status);
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return UNORM_MAYBE;
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::isNormalized(const UnicodeString& source,
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UNormalizationMode mode, int32_t options,
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         UErrorCode &status) {
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status);
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_SUCCESS(status)) {
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(options&UNORM_UNICODE_3_2) {
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)).
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                isNormalized(source, status);
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return n2->isNormalized(source, status);
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString & U_EXPORT2
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::concatenate(const UnicodeString &left, const UnicodeString &right,
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UnicodeString &result,
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UNormalizationMode mode, int32_t options,
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        UErrorCode &errorCode) {
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) {
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        result.setToBogus();
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_SUCCESS(errorCode)) {
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString localDest;
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UnicodeString *dest;
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(&right!=&result) {
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest=&result;
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            // the right and result strings are the same object, use a temporary one
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            dest=&localDest;
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        *dest=left;
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode);
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(U_SUCCESS(errorCode)) {
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(options&UNORM_UNICODE_3_2) {
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)).
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    append(*dest, right, errorCode);
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            } else {
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                n2->append(*dest, right, errorCode);
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(dest==&localDest && U_SUCCESS(errorCode)) {
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            result=*dest;
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return result;
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Iteration API
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the current character in the normalized text.
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::current() {
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(bufferPos<buffer.length() || nextNormalize()) {
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return buffer.char32At(bufferPos);
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return DONE;
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the next character in the normalized text and advance
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the iteration position by one.  If the end
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the text has already been reached, {@link #DONE} is returned.
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::next() {
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(bufferPos<buffer.length() ||  nextNormalize()) {
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c=buffer.char32At(bufferPos);
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bufferPos+=U16_LENGTH(c);
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return c;
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return DONE;
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the previous character in the normalized text and decrement
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the iteration position by one.  If the beginning
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the text has already been reached, {@link #DONE} is returned.
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::previous() {
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(bufferPos>0 || previousNormalize()) {
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c=buffer.char32At(bufferPos-1);
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        bufferPos-=U16_LENGTH(c);
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return c;
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return DONE;
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer::reset() {
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex=nextIndex=text->setToStart();
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clearBuffer();
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setIndexOnly(int32_t index) {
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text->setIndex(index);  // pins index
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex=nextIndex=text->getIndex();
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clearBuffer();
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the first character in the normalized text.  This resets
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the <tt>Normalizer's</tt> position to the beginning of the text.
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::first() {
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return next();
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the last character in the normalized text.  This resets
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the <tt>Normalizer's</tt> position to be just before the
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the input text corresponding to that normalized character.
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::last() {
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex=nextIndex=text->setToEnd();
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clearBuffer();
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return previous();
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the current iteration position in the input text that is
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * being normalized.  This method is useful in applications such as
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * searching, where you need to be able to determine the position in
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the input text that corresponds to a given normalized output character.
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p>
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <b>Note:</b> This method sets the position in the <em>input</em>, while
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * {@link #next} and {@link #previous} iterate through characters in the
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <em>output</em>.  This means that there is not necessarily a one-to-one
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * correspondence between characters returned by <tt>next</tt> and
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <tt>previous</tt> and the indices passed to and returned from
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <tt>setIndex</tt> and {@link #getIndex}.
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::getIndex() const {
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(bufferPos<buffer.length()) {
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return currentIndex;
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return nextIndex;
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the index of the start of the input text.  This is the begin index
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt>
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over which this <tt>Normalizer</tt> is iterating
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::startIndex() const {
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return text->startIndex();
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the index of the end of the input text.  This is the end index
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over which this <tt>Normalizer</tt> is iterating
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::endIndex() const {
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return text->endIndex();
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Property access methods
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setMode(UNormalizationMode newMode)
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fUMode = newMode;
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUNormalizationMode
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getUMode() const
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return fUMode;
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setOption(int32_t option,
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                      UBool value)
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (value) {
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fOptions |= option;
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    } else {
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fOptions &= (~option);
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    init();
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getOption(int32_t option) const
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (fOptions & option) != 0;
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set the input text over which this <tt>Normalizer</tt> will iterate.
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The iteration position is set to the beginning of the input text.
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const UnicodeString& newText,
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode &status)
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CharacterIterator *newIter = new StringCharacterIterator(newText);
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (newIter == NULL) {
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete text;
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text = newIter;
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set the input text over which this <tt>Normalizer</tt> will iterate.
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The iteration position is set to the beginning of the string.
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const CharacterIterator& newText,
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode &status)
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CharacterIterator *newIter = newText.clone();
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (newIter == NULL) {
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete text;
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text = newIter;
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const UChar* newText,
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    int32_t length,
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    UErrorCode &status)
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (U_FAILURE(status)) {
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    CharacterIterator *newIter = new UCharCharacterIterator(newText, length);
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (newIter == NULL) {
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        status = U_MEMORY_ALLOCATION_ERROR;
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    delete text;
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text = newIter;
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    reset();
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/**
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copies the text under iteration into the UnicodeString referred to by "result".
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param result Receives a copy of the text under iteration.
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getText(UnicodeString&  result)
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text->getText(result);
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Private utility methods
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//-------------------------------------------------------------------------
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer::clearBuffer() {
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    buffer.remove();
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bufferPos=0;
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::nextNormalize() {
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clearBuffer();
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex=nextIndex;
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text->setIndex(nextIndex);
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(!text->hasNext()) {
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    // Skip at least one character so we make progress.
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString segment(text->next32PostInc());
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(text->hasNext()) {
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c;
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) {
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            text->move32(-1, CharacterIterator::kCurrent);
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        segment.append(c);
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextIndex=text->getIndex();
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode=U_ZERO_ERROR;
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fNorm2->normalize(segment, buffer, errorCode);
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return U_SUCCESS(errorCode) && !buffer.isEmpty();
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::previousNormalize() {
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    clearBuffer();
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    nextIndex=currentIndex;
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    text->setIndex(currentIndex);
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(!text->hasPrevious()) {
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return FALSE;
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UnicodeString segment;
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(text->hasPrevious()) {
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        UChar32 c=text->previous32();
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        segment.insert(0, c);
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(fNorm2->hasBoundaryBefore(c)) {
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            break;
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    currentIndex=text->getIndex();
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode=U_ZERO_ERROR;
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fNorm2->normalize(segment, buffer, errorCode);
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    bufferPos=buffer.length();
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return U_SUCCESS(errorCode) && !buffer.isEmpty();
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_NORMALIZATION */
522