1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (c) 1996-2011, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru****************************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File unorm.cpp 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Created by: Vladimir Weinstein 12052000 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification history : 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/01/01 synwee Added normalization quickcheck enum and method. 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/12/01 synwee Commented out quickcheck util api has been approved 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Added private method for doing FCD checks 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 02/23/01 synwee Modified quickcheck and checkFCE to run through 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* string for codepoints < 0x300 for the normalization 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* mode NFC. 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 05/25/01+ Markus Scherer total rewrite, implement all normalization here 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* instead of just wrappers around normlzr.cpp, 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* load unorm.dat, support Unicode 3.1 with 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* supplementary code points, etc. 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uiter.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unorm.h" 3427f654740f2a26ad62a5c155af9199af9e69b889claireho#include "unicode/unorm2.h" 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "normalizer2impl.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unormimp.h" 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "uprops.h" 3827f654740f2a26ad62a5c155af9199af9e69b889claireho#include "ustr_imp.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* quick check functions ---------------------------------------------------- */ 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UNormalizationCheckResult U_EXPORT2 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_quickCheck(const UChar *src, 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcLength, 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 5150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 5250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UNormalizationCheckResult U_EXPORT2 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 5950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 6050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 6150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 6227f654740f2a26ad62a5c155af9199af9e69b889claireho return unorm2_quickCheck( 6327f654740f2a26ad62a5c155af9199af9e69b889claireho reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 6427f654740f2a26ad62a5c155af9199af9e69b889claireho src, srcLength, pErrorCode); 6550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 6650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 6750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_isNormalized(const UChar *src, int32_t srcLength, 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 7450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 7550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 8250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 8350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 8450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 8527f654740f2a26ad62a5c155af9199af9e69b889claireho return unorm2_isNormalized( 8627f654740f2a26ad62a5c155af9199af9e69b889claireho reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 8727f654740f2a26ad62a5c155af9199af9e69b889claireho src, srcLength, pErrorCode); 8850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 8950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho/* normalize() API ---------------------------------------------------------- */ 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** Public API for normalizing. */ 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_normalize(const UChar *src, int32_t srcLength, 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 10150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 10250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(options&UNORM_UNICODE_3_2) { 10350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 10427f654740f2a26ad62a5c155af9199af9e69b889claireho return unorm2_normalize( 10527f654740f2a26ad62a5c155af9199af9e69b889claireho reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 10650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src, srcLength, dest, destCapacity, pErrorCode); 10750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 10850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm2_normalize((const UNormalizer2 *)n2, 10950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src, srcLength, dest, destCapacity, pErrorCode); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* iteration functions ------------------------------------------------------ */ 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic int32_t 11783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius_iterate(UCharIterator *src, UBool forward, 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *dest, int32_t destCapacity, 11983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const Normalizer2 *n2, 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doNormalize, UBool *pNeededToNormalize, 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(*pErrorCode)) { 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 12583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(pNeededToNormalize!=NULL) { 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pNeededToNormalize=FALSE; 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 13350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { 13450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString buffer; 13850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar32 c; 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(forward) { 14050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* get one character and ignore its properties */ 14150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(uiter_next32(src)); 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* get all following characters until we see a boundary */ 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=uiter_next32(src))>=0) { 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(n2->hasBoundaryBefore(c)) { 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* back out the latest movement to stop at the boundary */ 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho src->move(src, -U16_LENGTH(c), UITER_CURRENT); 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.append(c); 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho while((c=uiter_previous32(src))>=0) { 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* always write this character to the front of the buffer */ 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho buffer.insert(0, c); 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* stop if this just-copied character is a boundary */ 15750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(n2->hasBoundaryBefore(c)) { 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho break; 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 16350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString destString(dest, 0, destCapacity); 16450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(buffer.length()>0 && doNormalize) { 16550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); 16650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { 16750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho *pNeededToNormalize= destString!=buffer; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 16950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return destString.length(); 17050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 17150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho /* just copy the source characters */ 17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return buffer.extract(dest, destCapacity, *pErrorCode); 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 17683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic int32_t 17783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusunorm_iterate(UCharIterator *src, UBool forward, 17883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 17983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UNormalizationMode mode, int32_t options, 18083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UBool doNormalize, UBool *pNeededToNormalize, 18183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 18283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 18383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(options&UNORM_UNICODE_3_2) { 18483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); 18583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U_FAILURE(*pErrorCode)) { 18683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return 0; 18783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 18883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius FilteredNormalizer2 fn2(*n2, *uni32); 18983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _iterate(src, forward, dest, destCapacity, 19083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius &fn2, doNormalize, pNeededToNormalize, pErrorCode); 19183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 19283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _iterate(src, forward, dest, destCapacity, 19383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius n2, doNormalize, pNeededToNormalize, pErrorCode); 19483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 19583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoU_CAPI int32_t U_EXPORT2 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehounorm_previous(UCharIterator *src, 19850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UChar *dest, int32_t destCapacity, 19950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNormalizationMode mode, int32_t options, 20050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UBool doNormalize, UBool *pNeededToNormalize, 20150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) { 20250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm_iterate(src, FALSE, 20350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest, destCapacity, 20450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mode, options, 20550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho doNormalize, pNeededToNormalize, 20650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pErrorCode); 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruunorm_next(UCharIterator *src, 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool doNormalize, UBool *pNeededToNormalize, 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 21550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return unorm_iterate(src, TRUE, 21650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dest, destCapacity, 21750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho mode, options, 21850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho doNormalize, pNeededToNormalize, 21950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pErrorCode); 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Concatenation of normalized strings -------------------------------------- */ 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 22483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusstatic int32_t 22583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius_concatenate(const UChar *left, int32_t leftLength, 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *right, int32_t rightLength, 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *dest, int32_t destCapacity, 22883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const Normalizer2 *n2, 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode *pErrorCode) { 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(*pErrorCode)) { 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 23383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(destCapacity<0 || (dest==NULL && destCapacity>0) || 23483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius left==NULL || leftLength<-1 || right==NULL || rightLength<-1) { 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* check for overlapping right and destination */ 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( dest!=NULL && 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((right>=dest && right<(dest+destCapacity)) || 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (rightLength>0 && dest>=right && dest<(right+rightLength))) 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* allow left==dest */ 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString destString; 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(left==dest) { 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destString.setTo(dest, leftLength, destCapacity); 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 25350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destString.setTo(dest, 0, destCapacity); 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho destString.append(left, leftLength); 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho extract(dest, destCapacity, *pErrorCode); 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26083a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusU_CAPI int32_t U_EXPORT2 26183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Corneliusunorm_concatenate(const UChar *left, int32_t leftLength, 26283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UChar *right, int32_t rightLength, 26383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar *dest, int32_t destCapacity, 26483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UNormalizationMode mode, int32_t options, 26583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UErrorCode *pErrorCode) { 26683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 26783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(options&UNORM_UNICODE_3_2) { 26883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); 26983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U_FAILURE(*pErrorCode)) { 27083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return 0; 27183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius FilteredNormalizer2 fn2(*n2, *uni32); 27383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _concatenate(left, leftLength, right, rightLength, 27483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius dest, destCapacity, &fn2, pErrorCode); 27583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 27683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return _concatenate(left, leftLength, right, rightLength, 27783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius dest, destCapacity, n2, pErrorCode); 27883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 27983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 281