1f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/* 2f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ************************************************************************* 3f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * COPYRIGHT: 4f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copyright (c) 1996-2010, International Business Machines Corporation and 5f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * others. All Rights Reserved. 6f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) ************************************************************************* 7f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 8f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 9f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/utypes.h" 10f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 11f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#if !UCONFIG_NO_NORMALIZATION 12f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 13f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uniset.h" 14f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/unistr.h" 15f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/chariter.h" 16f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/schriter.h" 17f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/uchriter.h" 18f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "unicode/normlzr.h" 19f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "cmemory.h" 20f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "normalizer2impl.h" 21f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#include "uprops.h" // for uniset_getUnicode32Instance() 22f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 23f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_BEGIN 24f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 25f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) 26f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 27f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 28f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Constructors and other boilerplate 29f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 30f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 31f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : 32f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 33f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text(new StringCharacterIterator(str)), 34f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex(0), nextIndex(0), 35f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer(), bufferPos(0) 36f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 37f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 38f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 39f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 40f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const UChar *str, int32_t length, UNormalizationMode mode) : 41f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 42f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text(new UCharCharacterIterator(str, length)), 43f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex(0), nextIndex(0), 44f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer(), bufferPos(0) 45f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 46f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 47f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 48f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 49f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : 50f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), 51f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text(iter.clone()), 52f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex(0), nextIndex(0), 53f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer(), bufferPos(0) 54f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 55f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 56f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 57f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 58f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::Normalizer(const Normalizer ©) : 59f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions), 60f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text(copy.text->clone()), 61f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), 62f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer(copy.buffer), bufferPos(copy.bufferPos) 63f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 64f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 65f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 66f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 67f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)static const UChar _NUL=0; 68f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 69f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 70f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::init() { 71f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode errorCode=U_ZERO_ERROR; 72f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); 73f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fOptions&UNORM_UNICODE_3_2) { 74f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fFilteredNorm2; 75f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNorm2=fFilteredNorm2= 76f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); 77f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 78f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_FAILURE(errorCode)) { 79f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorCode=U_ZERO_ERROR; 80f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNorm2=Normalizer2Factory::getNoopInstance(errorCode); 81f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 82f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 83f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 84f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::~Normalizer() 85f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 86f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete fFilteredNorm2; 87f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete text; 88f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 89f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 90f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer* 91f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::clone() const 92f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 93f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return new Normalizer(*this); 94f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 95f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 96f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 97f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Generates a hash code for this iterator. 98f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 99f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::hashCode() const 100f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 101f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; 102f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 103f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 104f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool Normalizer::operator==(const Normalizer& that) const 105f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 106f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return 107f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) this==&that || 108f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) (fUMode==that.fUMode && 109f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOptions==that.fOptions && 110f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *text==*that.text && 111f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer==that.buffer && 112f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferPos==that.bufferPos && 113f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextIndex==that.nextIndex); 114f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 115f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 116f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 117f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Static utility methods 118f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 119f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 120f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2 121f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::normalize(const UnicodeString& source, 122f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNormalizationMode mode, int32_t options, 123f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString& result, 124f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 125f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(source.isBogus() || U_FAILURE(status)) { 126f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result.setToBogus(); 127f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(status)) { 128f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status=U_ILLEGAL_ARGUMENT_ERROR; 129f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 130f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 131f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString localDest; 132f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString *dest; 133f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 134f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(&source!=&result) { 135f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest=&result; 136f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 137f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the source and result strings are the same object, use a temporary one 138f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest=&localDest; 139f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 140f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 141f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(status)) { 142f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(options&UNORM_UNICODE_3_2) { 143f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 144f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) normalize(source, *dest, status); 145f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 146f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) n2->normalize(source, *dest, status); 147f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 148f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 149f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(dest==&localDest && U_SUCCESS(status)) { 150f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result=*dest; 151f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 152f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 153f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 154f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 155f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2 156f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::compose(const UnicodeString& source, 157f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool compat, int32_t options, 158f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString& result, 159f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 160f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); 161f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 162f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 163f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void U_EXPORT2 164f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::decompose(const UnicodeString& source, 165f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool compat, int32_t options, 166f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString& result, 167f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 168f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); 169f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 170f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 171f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UNormalizationCheckResult 172f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::quickCheck(const UnicodeString& source, 173f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNormalizationMode mode, int32_t options, 174f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 175f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 176f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(status)) { 177f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(options&UNORM_UNICODE_3_2) { 178f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 179f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) quickCheck(source, status); 180f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 181f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return n2->quickCheck(source, status); 182f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 183f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 184f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return UNORM_MAYBE; 185f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 186f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 187f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 188f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool 189f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::isNormalized(const UnicodeString& source, 190f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNormalizationMode mode, int32_t options, 191f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) { 192f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); 193f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(status)) { 194f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(options&UNORM_UNICODE_3_2) { 195f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). 196f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) isNormalized(source, status); 197f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 198f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return n2->isNormalized(source, status); 199f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 200f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 201f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 202f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 203f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 204f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 205f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UnicodeString & U_EXPORT2 206f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::concatenate(UnicodeString &left, UnicodeString &right, 207f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString &result, 208f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UNormalizationMode mode, int32_t options, 209f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &errorCode) { 210f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { 211f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result.setToBogus(); 212f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(errorCode)) { 213f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) errorCode=U_ILLEGAL_ARGUMENT_ERROR; 214f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 215f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 216f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString localDest; 217f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString *dest; 218f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 219f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(&right!=&result) { 220f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest=&result; 221f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 222f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // the right and result strings are the same object, use a temporary one 223f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) dest=&localDest; 224f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 225f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) *dest=left; 226f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); 227f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(U_SUCCESS(errorCode)) { 228f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(options&UNORM_UNICODE_3_2) { 229f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). 230f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) append(*dest, right, errorCode); 231f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 232f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) n2->append(*dest, right, errorCode); 233f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 234f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 235f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(dest==&localDest && U_SUCCESS(errorCode)) { 236f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) result=*dest; 237f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 238f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 239f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return result; 240f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 241f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 242f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 243f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Iteration API 244f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 245f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 246f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 247f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the current character in the normalized text. 248f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 249f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::current() { 250f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bufferPos<buffer.length() || nextNormalize()) { 251f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return buffer.char32At(bufferPos); 252f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 253f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return DONE; 254f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 255f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 256f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 257f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 258f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the next character in the normalized text and advance 259f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the iteration position by one. If the end 260f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the text has already been reached, {@link #DONE} is returned. 261f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 262f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::next() { 263f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bufferPos<buffer.length() || nextNormalize()) { 264f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c=buffer.char32At(bufferPos); 265f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferPos+=UTF_CHAR_LENGTH(c); 266f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return c; 267f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 268f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return DONE; 269f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 270f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 271f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 272f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 273f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the previous character in the normalized text and decrement 274f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the iteration position by one. If the beginning 275f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the text has already been reached, {@link #DONE} is returned. 276f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 277f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::previous() { 278f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bufferPos>0 || previousNormalize()) { 279f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c=buffer.char32At(bufferPos-1); 280f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferPos-=UTF_CHAR_LENGTH(c); 281f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return c; 282f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 283f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return DONE; 284f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 285f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 286f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 287f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void Normalizer::reset() { 288f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex=nextIndex=text->setToStart(); 289f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clearBuffer(); 290f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 291f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 292f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 293f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setIndexOnly(int32_t index) { 294f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text->setIndex(index); // pins index 295f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex=nextIndex=text->getIndex(); 296f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clearBuffer(); 297f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 298f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 299f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 300f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the first character in the normalized text. This resets 301f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the <tt>Normalizer's</tt> position to the beginning of the text. 302f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 303f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::first() { 304f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 305f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return next(); 306f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 307f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 308f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 309f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Return the last character in the normalized text. This resets 310f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the <tt>Normalizer's</tt> position to be just before the 311f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the input text corresponding to that normalized character. 312f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 313f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UChar32 Normalizer::last() { 314f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex=nextIndex=text->setToEnd(); 315f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clearBuffer(); 316f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return previous(); 317f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 318f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 319f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 320f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the current iteration position in the input text that is 321f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * being normalized. This method is useful in applications such as 322f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * searching, where you need to be able to determine the position in 323f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * the input text that corresponds to a given normalized output character. 324f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <p> 325f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <b>Note:</b> This method sets the position in the <em>input</em>, while 326f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * {@link #next} and {@link #previous} iterate through characters in the 327f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <em>output</em>. This means that there is not necessarily a one-to-one 328f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * correspondence between characters returned by <tt>next</tt> and 329f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <tt>previous</tt> and the indices passed to and returned from 330f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * <tt>setIndex</tt> and {@link #getIndex}. 331f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * 332f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 333f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::getIndex() const { 334f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(bufferPos<buffer.length()) { 335f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return currentIndex; 336f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 337f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return nextIndex; 338f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 339f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 340f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 341f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 342f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the index of the start of the input text. This is the begin index 343f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> 344f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * over which this <tt>Normalizer</tt> is iterating 345f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 346f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::startIndex() const { 347f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return text->startIndex(); 348f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 349f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 350f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 351f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Retrieve the index of the end of the input text. This is the end index 352f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> 353f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * over which this <tt>Normalizer</tt> is iterating 354f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 355f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)int32_t Normalizer::endIndex() const { 356f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return text->endIndex(); 357f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 358f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 359f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 360f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Property access methods 361f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 362f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 363f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 364f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setMode(UNormalizationMode newMode) 365f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 366f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fUMode = newMode; 367f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 368f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 369f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 370f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UNormalizationMode 371f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getUMode() const 372f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 373f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return fUMode; 374f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 375f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 376f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 377f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setOption(int32_t option, 378f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UBool value) 379f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 380f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (value) { 381f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOptions |= option; 382f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } else { 383f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fOptions &= (~option); 384f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 385f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) init(); 386f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 387f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 388f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool 389f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getOption(int32_t option) const 390f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 391f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return (fOptions & option) != 0; 392f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 393f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 394f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 395f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the input text over which this <tt>Normalizer</tt> will iterate. 396f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The iteration position is set to the beginning of the input text. 397f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 398f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 399f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const UnicodeString& newText, 400f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 401f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 402f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 403f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 404f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 405f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharacterIterator *newIter = new StringCharacterIterator(newText); 406f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newIter == NULL) { 407f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 408f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 409f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 410f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete text; 411f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text = newIter; 412f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 413f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 414f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 415f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 416f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Set the input text over which this <tt>Normalizer</tt> will iterate. 417f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * The iteration position is set to the beginning of the string. 418f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 419f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 420f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const CharacterIterator& newText, 421f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 422f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 423f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 424f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 425f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 426f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharacterIterator *newIter = newText.clone(); 427f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newIter == NULL) { 428f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 429f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 430f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 431f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete text; 432f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text = newIter; 433f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 434f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 435f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 436f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 437f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::setText(const UChar* newText, 438f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) int32_t length, 439f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode &status) 440f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 441f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (U_FAILURE(status)) { 442f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 443f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 444f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) CharacterIterator *newIter = new UCharCharacterIterator(newText, length); 445f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if (newIter == NULL) { 446f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) status = U_MEMORY_ALLOCATION_ERROR; 447f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return; 448f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 449f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) delete text; 450f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text = newIter; 451f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) reset(); 452f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 453f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 454f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)/** 455f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * Copies the text under iteration into the UnicodeString referred to by "result". 456f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) * @param result Receives a copy of the text under iteration. 457f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) */ 458f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void 459f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::getText(UnicodeString& result) 460f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles){ 461f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text->getText(result); 462f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 463f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 464f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 465f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)// Private utility methods 466f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)//------------------------------------------------------------------------- 467f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 468f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)void Normalizer::clearBuffer() { 469f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) buffer.remove(); 470f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferPos=0; 471f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 472f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 473f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool 474f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::nextNormalize() { 475f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clearBuffer(); 476f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex=nextIndex; 477f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text->setIndex(nextIndex); 478f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!text->hasNext()) { 479f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 480f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 481f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) // Skip at least one character so we make progress. 482f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString segment(text->next32PostInc()); 483f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(text->hasNext()) { 484f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c; 485f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { 486f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text->move32(-1, CharacterIterator::kCurrent); 487f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 488f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 489f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) segment.append(c); 490f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 491f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextIndex=text->getIndex(); 492f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode errorCode=U_ZERO_ERROR; 493f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNorm2->normalize(segment, buffer, errorCode); 494f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return U_SUCCESS(errorCode) && !buffer.isEmpty(); 495f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 496f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 497f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)UBool 498f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)Normalizer::previousNormalize() { 499f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) clearBuffer(); 500f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) nextIndex=currentIndex; 501f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) text->setIndex(currentIndex); 502f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(!text->hasPrevious()) { 503f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return FALSE; 504f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 505f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UnicodeString segment; 506f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) while(text->hasPrevious()) { 507f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UChar32 c=text->previous32(); 508f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) segment.insert(0, c); 509f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) if(fNorm2->hasBoundaryBefore(c)) { 510f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) break; 511f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 512f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) } 513f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) currentIndex=text->getIndex(); 514f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) UErrorCode errorCode=U_ZERO_ERROR; 515f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) fNorm2->normalize(segment, buffer, errorCode); 516f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) bufferPos=buffer.length(); 517f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) return U_SUCCESS(errorCode) && !buffer.isEmpty(); 518f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)} 519f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 520f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)U_NAMESPACE_END 521f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles) 522f4ed1cf5d184064c4cf0e4359c6d5d8aadb50afaTorne (Richard Coles)#endif /* #if !UCONFIG_NO_NORMALIZATION */ 523