16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * COPYRIGHT: 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copyright (c) 1996-2012, International Business Machines Corporation and 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ************************************************************************* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uniset.h" 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unistr.h" 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/chariter.h" 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/schriter.h" 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchriter.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/normlzr.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uprops.h" // for uniset_getUnicode32Instance() 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Constructors and other boilerplate 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text(new StringCharacterIterator(str)), 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex(0), nextIndex(0), 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer(), bufferPos(0) 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text(new UCharCharacterIterator(str, length)), 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex(0), nextIndex(0), 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer(), bufferPos(0) 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text(iter.clone()), 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex(0), nextIndex(0), 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer(), bufferPos(0) 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::Normalizer(const Normalizer ©) : 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions), 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text(copy.text->clone()), 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer(copy.buffer), bufferPos(copy.bufferPos) 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::init() { 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(fOptions&UNORM_UNICODE_3_2) { 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fFilteredNorm2; 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNorm2=fFilteredNorm2= 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_ZERO_ERROR; 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNorm2=Normalizer2Factory::getNoopInstance(errorCode); 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::~Normalizer() 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete fFilteredNorm2; 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete text; 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer* 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::clone() const 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return new Normalizer(*this); 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Generates a hash code for this iterator. 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::hashCode() const 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool Normalizer::operator==(const Normalizer& that) const 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org this==&that || 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (fUMode==that.fUMode && 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fOptions==that.fOptions && 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *text==*that.text && 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer==that.buffer && 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferPos==that.bufferPos && 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextIndex==that.nextIndex); 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Static utility methods 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::normalize(const UnicodeString& source, 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString& result, 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source.isBogus() || U_FAILURE(status)) { 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.setToBogus(); 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status=U_ILLEGAL_ARGUMENT_ERROR; 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString localDest; 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString *dest; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(&source!=&result) { 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest=&result; 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the source and result strings are the same object, use a temporary one 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest=&localDest; 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org normalize(source, *dest, status); 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n2->normalize(source, *dest, status); 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dest==&localDest && U_SUCCESS(status)) { 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=*dest; 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::compose(const UnicodeString& source, 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool compat, int32_t options, 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString& result, 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid U_EXPORT2 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::decompose(const UnicodeString& source, 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool compat, int32_t options, 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString& result, 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUNormalizationCheckResult 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::quickCheck(const UnicodeString& source, 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org quickCheck(source, status); 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return n2->quickCheck(source, status); 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UNORM_MAYBE; 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::isNormalized(const UnicodeString& source, 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) { 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(status)) { 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isNormalized(source, status); 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return n2->isNormalized(source, status); 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUnicodeString & U_EXPORT2 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::concatenate(const UnicodeString &left, const UnicodeString &right, 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString &result, 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &errorCode) { 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.setToBogus(); 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(errorCode)) { 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_ILLEGAL_ARGUMENT_ERROR; 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString localDest; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString *dest; 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(&right!=&result) { 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest=&result; 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the right and result strings are the same object, use a temporary one 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest=&localDest; 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *dest=left; 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(errorCode)) { 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org append(*dest, right, errorCode); 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n2->append(*dest, right, errorCode); 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dest==&localDest && U_SUCCESS(errorCode)) { 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result=*dest; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Iteration API 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the current character in the normalized text. 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::current() { 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bufferPos<buffer.length() || nextNormalize()) { 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer.char32At(bufferPos); 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return DONE; 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the next character in the normalized text and advance 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the iteration position by one. If the end 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the text has already been reached, {@link #DONE} is returned. 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::next() { 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bufferPos<buffer.length() || nextNormalize()) { 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=buffer.char32At(bufferPos); 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferPos+=U16_LENGTH(c); 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return DONE; 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the previous character in the normalized text and decrement 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the iteration position by one. If the beginning 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the text has already been reached, {@link #DONE} is returned. 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::previous() { 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bufferPos>0 || previousNormalize()) { 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=buffer.char32At(bufferPos-1); 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferPos-=U16_LENGTH(c); 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return DONE; 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer::reset() { 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex=nextIndex=text->setToStart(); 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clearBuffer(); 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setIndexOnly(int32_t index) { 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text->setIndex(index); // pins index 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex=nextIndex=text->getIndex(); 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clearBuffer(); 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the first character in the normalized text. This resets 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the <tt>Normalizer's</tt> position to the beginning of the text. 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::first() { 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return next(); 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return the last character in the normalized text. This resets 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the <tt>Normalizer's</tt> position to be just before the 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the input text corresponding to that normalized character. 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar32 Normalizer::last() { 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex=nextIndex=text->setToEnd(); 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clearBuffer(); 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return previous(); 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the current iteration position in the input text that is 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * being normalized. This method is useful in applications such as 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * searching, where you need to be able to determine the position in 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the input text that corresponds to a given normalized output character. 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <p> 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <b>Note:</b> This method sets the position in the <em>input</em>, while 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * {@link #next} and {@link #previous} iterate through characters in the 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <em>output</em>. This means that there is not necessarily a one-to-one 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * correspondence between characters returned by <tt>next</tt> and 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <tt>previous</tt> and the indices passed to and returned from 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <tt>setIndex</tt> and {@link #getIndex}. 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::getIndex() const { 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bufferPos<buffer.length()) { 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return currentIndex; 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return nextIndex; 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the index of the start of the input text. This is the begin index 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over which this <tt>Normalizer</tt> is iterating 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::startIndex() const { 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return text->startIndex(); 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Retrieve the index of the end of the input text. This is the end index 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * over which this <tt>Normalizer</tt> is iterating 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t Normalizer::endIndex() const { 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return text->endIndex(); 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Property access methods 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setMode(UNormalizationMode newMode) 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fUMode = newMode; 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUNormalizationMode 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getUMode() const 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return fUMode; 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setOption(int32_t option, 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool value) 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (value) { 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fOptions |= option; 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fOptions &= (~option); 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org init(); 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getOption(int32_t option) const 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (fOptions & option) != 0; 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set the input text over which this <tt>Normalizer</tt> will iterate. 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The iteration position is set to the beginning of the input text. 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const UnicodeString& newText, 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CharacterIterator *newIter = new StringCharacterIterator(newText); 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newIter == NULL) { 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete text; 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text = newIter; 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set the input text over which this <tt>Normalizer</tt> will iterate. 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The iteration position is set to the beginning of the string. 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const CharacterIterator& newText, 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CharacterIterator *newIter = newText.clone(); 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newIter == NULL) { 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete text; 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text = newIter; 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::setText(const UChar* newText, 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length, 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode &status) 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (U_FAILURE(status)) { 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CharacterIterator *newIter = new UCharCharacterIterator(newText, length); 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (newIter == NULL) { 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org status = U_MEMORY_ALLOCATION_ERROR; 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete text; 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text = newIter; 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reset(); 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Copies the text under iteration into the UnicodeString referred to by "result". 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param result Receives a copy of the text under iteration. 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::getText(UnicodeString& result) 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org{ 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text->getText(result); 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org// Private utility methods 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org//------------------------------------------------------------------------- 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid Normalizer::clearBuffer() { 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.remove(); 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferPos=0; 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::nextNormalize() { 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clearBuffer(); 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex=nextIndex; 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text->setIndex(nextIndex); 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!text->hasNext()) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Skip at least one character so we make progress. 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString segment(text->next32PostInc()); 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(text->hasNext()) { 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text->move32(-1, CharacterIterator::kCurrent); 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org segment.append(c); 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextIndex=text->getIndex(); 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNorm2->normalize(segment, buffer, errorCode); 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SUCCESS(errorCode) && !buffer.isEmpty(); 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgNormalizer::previousNormalize() { 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org clearBuffer(); 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextIndex=currentIndex; 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org text->setIndex(currentIndex); 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!text->hasPrevious()) { 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString segment; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(text->hasPrevious()) { 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c=text->previous32(); 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org segment.insert(0, c); 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(fNorm2->hasBoundaryBefore(c)) { 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex=text->getIndex(); 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fNorm2->normalize(segment, buffer, errorCode); 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bufferPos=buffer.length(); 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_SUCCESS(errorCode) && !buffer.isEmpty(); 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_NORMALIZATION */ 522