1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************* 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Copyright (c) 1996-2011, International Business Machines Corporation and 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ************************************************************************* 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/uniset.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/chariter.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchriter.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 2050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uprops.h" // for uniset_getUnicode32Instance() 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Constructors and other boilerplate 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text(new StringCharacterIterator(str)), 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentIndex(0), nextIndex(0), 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(), bufferPos(0) 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text(new UCharCharacterIterator(str, length)), 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentIndex(0), nextIndex(0), 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(), bufferPos(0) 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 4650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text(iter.clone()), 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentIndex(0), nextIndex(0), 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(), bufferPos(0) 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 5550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::Normalizer(const Normalizer ©) : 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions), 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text(copy.text->clone()), 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer(copy.buffer), bufferPos(copy.bufferPos) 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 6450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar _NUL=0; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 7050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::init() { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 7250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); 7350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(fOptions&UNORM_UNICODE_3_2) { 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fFilteredNorm2; 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNorm2=fFilteredNorm2= 7650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); 7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 7850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errorCode=U_ZERO_ERROR; 8050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNorm2=Normalizer2Factory::getNoopInstance(errorCode); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::~Normalizer() 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 8650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete fFilteredNorm2; 8750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete text; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer* 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::clone() const 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return new Normalizer(*this); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Generates a hash code for this iterator. 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::hashCode() const 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool Normalizer::operator==(const Normalizer& that) const 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru this==&that || 10827f654740f2a26ad62a5c155af9199af9e69b889claireho (fUMode==that.fUMode && 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOptions==that.fOptions && 11050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *text==*that.text && 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer==that.buffer && 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferPos==that.bufferPos && 11327f654740f2a26ad62a5c155af9199af9e69b889claireho nextIndex==that.nextIndex); 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Static utility methods 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::normalize(const UnicodeString& source, 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result, 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(source.isBogus() || U_FAILURE(status)) { 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.setToBogus(); 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(status)) { 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status=U_ILLEGAL_ARGUMENT_ERROR; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString localDest; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString *dest; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(&source!=&result) { 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest=&result; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the source and result strings are the same object, use a temporary one 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest=&localDest; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(status)) { 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(source, *dest, status); 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n2->normalize(source, *dest, status); 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(dest==&localDest && U_SUCCESS(status)) { 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=*dest; 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::compose(const UnicodeString& source, 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compat, int32_t options, 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result, 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::decompose(const UnicodeString& source, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool compat, int32_t options, 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result, 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUNormalizationCheckResult 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::quickCheck(const UnicodeString& source, 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalizationMode mode, int32_t options, 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(status)) { 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho quickCheck(source, status); 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return n2->quickCheck(source, status); 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 18450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return UNORM_MAYBE; 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 18650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoUBool 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoNormalizer::isNormalized(const UnicodeString& source, 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalizationMode mode, int32_t options, 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode &status) { 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(status)) { 19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho isNormalized(source, status); 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return n2->isNormalized(source, status); 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return FALSE; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString & U_EXPORT2 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoNormalizer::concatenate(const UnicodeString &left, const UnicodeString &right, 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString &result, 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &errorCode) { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.setToBogus(); 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ILLEGAL_ARGUMENT_ERROR; 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString localDest; 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString *dest; 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(&right!=&result) { 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest=&result; 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // the right and result strings are the same object, use a temporary one 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest=&localDest; 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 22550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *dest=left; 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_SUCCESS(errorCode)) { 22850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). 23050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho append(*dest, right, errorCode); 23150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 23250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n2->append(*dest, right, errorCode); 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(dest==&localDest && U_SUCCESS(errorCode)) { 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=*dest; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Iteration API 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the current character in the normalized text. 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::current() { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bufferPos<buffer.length() || nextNormalize()) { 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return buffer.char32At(bufferPos); 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return DONE; 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the next character in the normalized text and advance 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the iteration position by one. If the end 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the text has already been reached, {@link #DONE} is returned. 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::next() { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bufferPos<buffer.length() || nextNormalize()) { 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c=buffer.char32At(bufferPos); 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferPos+=UTF_CHAR_LENGTH(c); 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return DONE; 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the previous character in the normalized text and decrement 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the iteration position by one. If the beginning 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the text has already been reached, {@link #DONE} is returned. 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::previous() { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bufferPos>0 || previousNormalize()) { 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c=buffer.char32At(bufferPos-1); 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferPos-=UTF_CHAR_LENGTH(c); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return c; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return DONE; 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Normalizer::reset() { 28850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho currentIndex=nextIndex=text->setToStart(); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clearBuffer(); 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setIndexOnly(int32_t index) { 29450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text->setIndex(index); // pins index 29550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho currentIndex=nextIndex=text->getIndex(); 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clearBuffer(); 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 30050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Return the first character in the normalized text. This resets 30150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * the <tt>Normalizer's</tt> position to the beginning of the text. 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::first() { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return next(); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 30950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Return the last character in the normalized text. This resets 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the <tt>Normalizer's</tt> position to be just before the 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input text corresponding to that normalized character. 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar32 Normalizer::last() { 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho currentIndex=nextIndex=text->setToEnd(); 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clearBuffer(); 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return previous(); 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Retrieve the current iteration position in the input text that is 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * being normalized. This method is useful in applications such as 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * searching, where you need to be able to determine the position in 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the input text that corresponds to a given normalized output character. 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p> 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <b>Note:</b> This method sets the position in the <em>input</em>, while 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * {@link #next} and {@link #previous} iterate through characters in the 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <em>output</em>. This means that there is not necessarily a one-to-one 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * correspondence between characters returned by <tt>next</tt> and 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>previous</tt> and the indices passed to and returned from 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>setIndex</tt> and {@link #getIndex}. 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::getIndex() const { 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(bufferPos<buffer.length()) { 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return currentIndex; 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return nextIndex; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 34250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Retrieve the index of the start of the input text. This is the begin index 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * over which this <tt>Normalizer</tt> is iterating 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::startIndex() const { 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return text->startIndex(); 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * Retrieve the index of the end of the input text. This is the end index 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * over which this <tt>Normalizer</tt> is iterating 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Normalizer::endIndex() const { 35650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return text->endIndex(); 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Property access methods 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setMode(UNormalizationMode newMode) 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fUMode = newMode; 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUNormalizationMode 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getUMode() const 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return fUMode; 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setOption(int32_t option, 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool value) 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (value) { 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOptions |= option; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fOptions &= (~option); 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 38550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho init(); 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getOption(int32_t option) const 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (fOptions & option) != 0; 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set the input text over which this <tt>Normalizer</tt> will iterate. 39650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho * The iteration position is set to the beginning of the input text. 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const UnicodeString& newText, 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharacterIterator *newIter = new StringCharacterIterator(newText); 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newIter == NULL) { 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 41050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete text; 41150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text = newIter; 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Set the input text over which this <tt>Normalizer</tt> will iterate. 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The iteration position is set to the beginning of the string. 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const CharacterIterator& newText, 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharacterIterator *newIter = newText.clone(); 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newIter == NULL) { 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 43150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete text; 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text = newIter; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::setText(const UChar* newText, 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length, 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CharacterIterator *newIter = new UCharCharacterIterator(newText, length); 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (newIter == NULL) { 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 44950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete text; 45050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text = newIter; 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru reset(); 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copies the text under iteration into the UnicodeString referred to by "result". 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param result Receives a copy of the text under iteration. 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::getText(UnicodeString& result) 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 46150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text->getText(result); 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Private utility methods 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------- 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Normalizer::clearBuffer() { 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buffer.remove(); 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferPos=0; 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::nextNormalize() { 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clearBuffer(); 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru currentIndex=nextIndex; 47750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text->setIndex(nextIndex); 47850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!text->hasNext()) { 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 48150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Skip at least one character so we make progress. 48250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString segment(text->next32PostInc()); 48350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(text->hasNext()) { 48450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 48550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { 48650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text->move32(-1, CharacterIterator::kCurrent); 48750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 48850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 48950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho segment.append(c); 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 49150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho nextIndex=text->getIndex(); 49250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 49350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNorm2->normalize(segment, buffer, errorCode); 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SUCCESS(errorCode) && !buffer.isEmpty(); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizer::previousNormalize() { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru clearBuffer(); 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nextIndex=currentIndex; 50150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho text->setIndex(currentIndex); 50250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!text->hasPrevious()) { 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 50550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString segment; 50650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while(text->hasPrevious()) { 50750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c=text->previous32(); 50850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho segment.insert(0, c); 50950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(fNorm2->hasBoundaryBefore(c)) { 51050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 51150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 51350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho currentIndex=text->getIndex(); 51450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 51550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fNorm2->normalize(segment, buffer, errorCode); 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufferPos=buffer.length(); 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return U_SUCCESS(errorCode) && !buffer.isEmpty(); 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 523