16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org****************************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (c) 1996-2011, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org****************************************************************************** 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* File unorm.cpp 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Created by: Vladimir Weinstein 12052000 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Modification history : 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Date Name Description 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 02/01/01 synwee Added normalization quickcheck enum and method. 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 02/12/01 synwee Commented out quickcheck util api has been approved 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Added private method for doing FCD checks 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 02/23/01 synwee Modified quickcheck and checkFCE to run through 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* string for codepoints < 0x300 for the normalization 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* mode NFC. 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 05/25/01+ Markus Scherer total rewrite, implement all normalization here 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* instead of just wrappers around normlzr.cpp, 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* load unorm.dat, support Unicode 3.1 with 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* supplementary code points, etc. 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_NORMALIZATION 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h" 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uiter.h" 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unorm.h" 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/unorm2.h" 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "normalizer2impl.h" 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unormimp.h" 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uprops.h" 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h" 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_USE 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* quick check functions ---------------------------------------------------- */ 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UNormalizationCheckResult U_EXPORT2 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_quickCheck(const UChar *src, 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t srcLength, 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UNormalizationCheckResult U_EXPORT2 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_quickCheck( 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src, srcLength, pErrorCode); 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_isNormalized(const UChar *src, int32_t srcLength, 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_isNormalized( 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src, srcLength, pErrorCode); 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* normalize() API ---------------------------------------------------------- */ 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** Public API for normalizing. */ 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_normalize(const UChar *src, int32_t srcLength, 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_normalize( 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src, srcLength, dest, destCapacity, pErrorCode); 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm2_normalize((const UNormalizer2 *)n2, 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src, srcLength, dest, destCapacity, pErrorCode); 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* iteration functions ------------------------------------------------------ */ 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_iterate(UCharIterator *src, UBool forward, 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2, 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doNormalize, UBool *pNeededToNormalize, 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pNeededToNormalize!=NULL) { 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pNeededToNormalize=FALSE; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString buffer; 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(forward) { 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get one character and ignore its properties */ 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.append(uiter_next32(src)); 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get all following characters until we see a boundary */ 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while((c=uiter_next32(src))>=0) { 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(n2->hasBoundaryBefore(c)) { 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* back out the latest movement to stop at the boundary */ 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org src->move(src, -U16_LENGTH(c), UITER_CURRENT); 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.append(c); 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while((c=uiter_previous32(src))>=0) { 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* always write this character to the front of the buffer */ 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buffer.insert(0, c); 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* stop if this just-copied character is a boundary */ 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(n2->hasBoundaryBefore(c)) { 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString destString(dest, 0, destCapacity); 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buffer.length()>0 && doNormalize) { 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pNeededToNormalize= destString!=buffer; 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return destString.length(); 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just copy the source characters */ 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buffer.extract(dest, destCapacity, *pErrorCode); 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_iterate(UCharIterator *src, UBool forward, 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doNormalize, UBool *pNeededToNormalize, 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2 fn2(*n2, *uni32); 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return _iterate(src, forward, dest, destCapacity, 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &fn2, doNormalize, pNeededToNormalize, pErrorCode); 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return _iterate(src, forward, dest, destCapacity, 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org n2, doNormalize, pNeededToNormalize, pErrorCode); 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_previous(UCharIterator *src, 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doNormalize, UBool *pNeededToNormalize, 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm_iterate(src, FALSE, 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest, destCapacity, 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode, options, 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org doNormalize, pNeededToNormalize, 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_next(UCharIterator *src, 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool doNormalize, UBool *pNeededToNormalize, 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return unorm_iterate(src, TRUE, 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest, destCapacity, 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mode, options, 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org doNormalize, pNeededToNormalize, 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Concatenation of normalized strings -------------------------------------- */ 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_concatenate(const UChar *left, int32_t leftLength, 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *right, int32_t rightLength, 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2, 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(destCapacity<0 || (dest==NULL && destCapacity>0) || 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org left==NULL || leftLength<-1 || right==NULL || rightLength<-1) { 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* check for overlapping right and destination */ 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( dest!=NULL && 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((right>=dest && right<(dest+destCapacity)) || 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (rightLength>0 && dest>=right && dest<(right+rightLength))) 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* allow left==dest */ 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UnicodeString destString; 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(left==dest) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destString.setTo(dest, leftLength, destCapacity); 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destString.setTo(dest, 0, destCapacity); 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org destString.append(left, leftLength); 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org extract(dest, destCapacity, *pErrorCode); 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunorm_concatenate(const UChar *left, int32_t leftLength, 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *right, int32_t rightLength, 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *dest, int32_t destCapacity, 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNormalizationMode mode, int32_t options, 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(options&UNORM_UNICODE_3_2) { 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FilteredNormalizer2 fn2(*n2, *uni32); 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return _concatenate(left, leftLength, right, rightLength, 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest, destCapacity, &fn2, pErrorCode); 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return _concatenate(left, leftLength, right, rightLength, 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dest, destCapacity, n2, pErrorCode); 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_NORMALIZATION */ 281