1/* 2******************************************************************************* 3* 4* Copyright (C) 2009-2010, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: normalizer2.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009nov22 14* created by: Markus W. Scherer 15*/ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_NORMALIZATION 20 21#include "unicode/localpointer.h" 22#include "unicode/normalizer2.h" 23#include "unicode/unistr.h" 24#include "unicode/unorm.h" 25#include "cpputils.h" 26#include "cstring.h" 27#include "mutex.h" 28#include "normalizer2impl.h" 29#include "ucln_cmn.h" 30#include "uhash.h" 31 32U_NAMESPACE_BEGIN 33 34// Public API dispatch via Normalizer2 subclasses -------------------------- *** 35 36// Normalizer2 implementation for the old UNORM_NONE. 37class NoopNormalizer2 : public Normalizer2 { 38 virtual UnicodeString & 39 normalize(const UnicodeString &src, 40 UnicodeString &dest, 41 UErrorCode &errorCode) const { 42 if(U_SUCCESS(errorCode)) { 43 if(&dest!=&src) { 44 dest=src; 45 } else { 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 47 } 48 } 49 return dest; 50 } 51 virtual UnicodeString & 52 normalizeSecondAndAppend(UnicodeString &first, 53 const UnicodeString &second, 54 UErrorCode &errorCode) const { 55 if(U_SUCCESS(errorCode)) { 56 if(&first!=&second) { 57 first.append(second); 58 } else { 59 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 60 } 61 } 62 return first; 63 } 64 virtual UnicodeString & 65 append(UnicodeString &first, 66 const UnicodeString &second, 67 UErrorCode &errorCode) const { 68 if(U_SUCCESS(errorCode)) { 69 if(&first!=&second) { 70 first.append(second); 71 } else { 72 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 73 } 74 } 75 return first; 76 } 77 virtual UBool 78 isNormalized(const UnicodeString &, UErrorCode &) const { 79 return TRUE; 80 } 81 virtual UNormalizationCheckResult 82 quickCheck(const UnicodeString &, UErrorCode &) const { 83 return UNORM_YES; 84 } 85 virtual int32_t 86 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 87 return s.length(); 88 } 89 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 90 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 91 virtual UBool isInert(UChar32) const { return TRUE; } 92 93 static UClassID U_EXPORT2 getStaticClassID(); 94 virtual UClassID getDynamicClassID() const; 95}; 96 97UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NoopNormalizer2) 98 99// Intermediate class: 100// Has Normalizer2Impl and does boilerplate argument checking and setup. 101class Normalizer2WithImpl : public Normalizer2 { 102public: 103 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 104 105 // normalize 106 virtual UnicodeString & 107 normalize(const UnicodeString &src, 108 UnicodeString &dest, 109 UErrorCode &errorCode) const { 110 if(U_FAILURE(errorCode)) { 111 dest.setToBogus(); 112 return dest; 113 } 114 const UChar *sArray=src.getBuffer(); 115 if(&dest==&src || sArray==NULL) { 116 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 117 dest.setToBogus(); 118 return dest; 119 } 120 dest.remove(); 121 ReorderingBuffer buffer(impl, dest); 122 if(buffer.init(src.length(), errorCode)) { 123 normalize(sArray, sArray+src.length(), buffer, errorCode); 124 } 125 return dest; 126 } 127 virtual void 128 normalize(const UChar *src, const UChar *limit, 129 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 130 131 // normalize and append 132 virtual UnicodeString & 133 normalizeSecondAndAppend(UnicodeString &first, 134 const UnicodeString &second, 135 UErrorCode &errorCode) const { 136 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 137 } 138 virtual UnicodeString & 139 append(UnicodeString &first, 140 const UnicodeString &second, 141 UErrorCode &errorCode) const { 142 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 143 } 144 UnicodeString & 145 normalizeSecondAndAppend(UnicodeString &first, 146 const UnicodeString &second, 147 UBool doNormalize, 148 UErrorCode &errorCode) const { 149 uprv_checkCanGetBuffer(first, errorCode); 150 if(U_FAILURE(errorCode)) { 151 return first; 152 } 153 const UChar *secondArray=second.getBuffer(); 154 if(&first==&second || secondArray==NULL) { 155 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 156 return first; 157 } 158 ReorderingBuffer buffer(impl, first); 159 if(buffer.init(first.length()+second.length(), errorCode)) { 160 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 161 buffer, errorCode); 162 } 163 return first; 164 } 165 virtual void 166 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 167 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 168 169 // quick checks 170 virtual UBool 171 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 172 if(U_FAILURE(errorCode)) { 173 return FALSE; 174 } 175 const UChar *sArray=s.getBuffer(); 176 if(sArray==NULL) { 177 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 178 return FALSE; 179 } 180 const UChar *sLimit=sArray+s.length(); 181 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 182 } 183 virtual UNormalizationCheckResult 184 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 185 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 186 } 187 virtual int32_t 188 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 189 if(U_FAILURE(errorCode)) { 190 return 0; 191 } 192 const UChar *sArray=s.getBuffer(); 193 if(sArray==NULL) { 194 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 195 return 0; 196 } 197 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 198 } 199 virtual const UChar * 200 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 201 202 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 203 return UNORM_YES; 204 } 205 206 static UClassID U_EXPORT2 getStaticClassID(); 207 virtual UClassID getDynamicClassID() const; 208 209 const Normalizer2Impl &impl; 210}; 211 212UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer2WithImpl) 213 214class DecomposeNormalizer2 : public Normalizer2WithImpl { 215public: 216 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 217 218private: 219 virtual void 220 normalize(const UChar *src, const UChar *limit, 221 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 222 impl.decompose(src, limit, &buffer, errorCode); 223 } 224 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 225 virtual void 226 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 227 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 228 impl.decomposeAndAppend(src, limit, doNormalize, buffer, errorCode); 229 } 230 virtual const UChar * 231 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 232 return impl.decompose(src, limit, NULL, errorCode); 233 } 234 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 235 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 236 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 237 } 238 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 239 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 240 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 241}; 242 243class ComposeNormalizer2 : public Normalizer2WithImpl { 244public: 245 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 246 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 247 248private: 249 virtual void 250 normalize(const UChar *src, const UChar *limit, 251 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 252 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 253 } 254 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 255 virtual void 256 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 257 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 258 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, buffer, errorCode); 259 } 260 261 virtual UBool 262 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 263 if(U_FAILURE(errorCode)) { 264 return FALSE; 265 } 266 const UChar *sArray=s.getBuffer(); 267 if(sArray==NULL) { 268 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 269 return FALSE; 270 } 271 UnicodeString temp; 272 ReorderingBuffer buffer(impl, temp); 273 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 274 return FALSE; 275 } 276 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 277 } 278 virtual UNormalizationCheckResult 279 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 280 if(U_FAILURE(errorCode)) { 281 return UNORM_MAYBE; 282 } 283 const UChar *sArray=s.getBuffer(); 284 if(sArray==NULL) { 285 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 286 return UNORM_MAYBE; 287 } 288 UNormalizationCheckResult qcResult=UNORM_YES; 289 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 290 return qcResult; 291 } 292 virtual const UChar * 293 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 294 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 295 } 296 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 297 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 298 return impl.getCompQuickCheck(impl.getNorm16(c)); 299 } 300 virtual UBool hasBoundaryBefore(UChar32 c) const { 301 return impl.hasCompBoundaryBefore(c); 302 } 303 virtual UBool hasBoundaryAfter(UChar32 c) const { 304 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 305 } 306 virtual UBool isInert(UChar32 c) const { 307 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 308 } 309 310 const UBool onlyContiguous; 311}; 312 313class FCDNormalizer2 : public Normalizer2WithImpl { 314public: 315 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 316 317private: 318 virtual void 319 normalize(const UChar *src, const UChar *limit, 320 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 321 impl.makeFCD(src, limit, &buffer, errorCode); 322 } 323 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 324 virtual void 325 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 326 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 327 impl.makeFCDAndAppend(src, limit, doNormalize, buffer, errorCode); 328 } 329 virtual const UChar * 330 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 331 return impl.makeFCD(src, limit, NULL, errorCode); 332 } 333 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 334 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 335 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 336 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 337}; 338 339// instance cache ---------------------------------------------------------- *** 340 341struct Norm2AllModes : public UMemory { 342 static Norm2AllModes *createInstance(const char *packageName, 343 const char *name, 344 UErrorCode &errorCode); 345 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 346 347 Normalizer2Impl impl; 348 ComposeNormalizer2 comp; 349 DecomposeNormalizer2 decomp; 350 FCDNormalizer2 fcd; 351 ComposeNormalizer2 fcc; 352}; 353 354Norm2AllModes * 355Norm2AllModes::createInstance(const char *packageName, 356 const char *name, 357 UErrorCode &errorCode) { 358 if(U_FAILURE(errorCode)) { 359 return NULL; 360 } 361 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 362 if(allModes.isNull()) { 363 errorCode=U_MEMORY_ALLOCATION_ERROR; 364 return NULL; 365 } 366 allModes->impl.load(packageName, name, errorCode); 367 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 368} 369 370U_CDECL_BEGIN 371static UBool U_CALLCONV uprv_normalizer2_cleanup(); 372U_CDECL_END 373 374class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { 375public: 376 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : 377 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} 378 Norm2AllModes *getInstance(UErrorCode &errorCode) { 379 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); 380 } 381private: 382 static void *createInstance(const void *context, UErrorCode &errorCode) { 383 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 384 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); 385 } 386 387 const char *name; 388}; 389 390STATIC_TRI_STATE_SINGLETON(nfcSingleton); 391STATIC_TRI_STATE_SINGLETON(nfkcSingleton); 392STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); 393 394class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { 395public: 396 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} 397 Normalizer2 *getInstance(UErrorCode &errorCode) { 398 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); 399 } 400private: 401 static void *createInstance(const void *, UErrorCode &errorCode) { 402 Normalizer2 *noop=new NoopNormalizer2; 403 if(noop==NULL) { 404 errorCode=U_MEMORY_ALLOCATION_ERROR; 405 } 406 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 407 return noop; 408 } 409}; 410 411STATIC_SIMPLE_SINGLETON(noopSingleton); 412 413static UHashtable *cache=NULL; 414 415U_CDECL_BEGIN 416 417static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 418 delete (Norm2AllModes *)allModes; 419} 420 421static UBool U_CALLCONV uprv_normalizer2_cleanup() { 422 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); 423 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); 424 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); 425 Norm2Singleton(noopSingleton).deleteInstance(); 426 uhash_close(cache); 427 cache=NULL; 428 return TRUE; 429} 430 431U_CDECL_END 432 433const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 434 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 435 return allModes!=NULL ? &allModes->comp : NULL; 436} 437 438const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 439 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 440 return allModes!=NULL ? &allModes->decomp : NULL; 441} 442 443const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 444 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 445 if(allModes!=NULL) { 446 allModes->impl.getFCDTrie(errorCode); 447 return &allModes->fcd; 448 } else { 449 return NULL; 450 } 451} 452 453const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 454 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 455 return allModes!=NULL ? &allModes->fcc : NULL; 456} 457 458const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 459 Norm2AllModes *allModes= 460 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 461 return allModes!=NULL ? &allModes->comp : NULL; 462} 463 464const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 465 Norm2AllModes *allModes= 466 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 467 return allModes!=NULL ? &allModes->decomp : NULL; 468} 469 470const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 471 Norm2AllModes *allModes= 472 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 473 return allModes!=NULL ? &allModes->comp : NULL; 474} 475 476const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 477 return Norm2Singleton(noopSingleton).getInstance(errorCode); 478} 479 480const Normalizer2 * 481Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 482 if(U_FAILURE(errorCode)) { 483 return NULL; 484 } 485 switch(mode) { 486 case UNORM_NFD: 487 return getNFDInstance(errorCode); 488 case UNORM_NFKD: 489 return getNFKDInstance(errorCode); 490 case UNORM_NFC: 491 return getNFCInstance(errorCode); 492 case UNORM_NFKC: 493 return getNFKCInstance(errorCode); 494 case UNORM_FCD: 495 return getFCDInstance(errorCode); 496 default: // UNORM_NONE 497 return getNoopInstance(errorCode); 498 } 499} 500 501const Normalizer2Impl * 502Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 503 Norm2AllModes *allModes= 504 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 505 return allModes!=NULL ? &allModes->impl : NULL; 506} 507 508const Normalizer2Impl * 509Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 510 Norm2AllModes *allModes= 511 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 512 return allModes!=NULL ? &allModes->impl : NULL; 513} 514 515const Normalizer2Impl * 516Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 517 Norm2AllModes *allModes= 518 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 519 return allModes!=NULL ? &allModes->impl : NULL; 520} 521 522const Normalizer2Impl * 523Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 524 return &((Normalizer2WithImpl *)norm2)->impl; 525} 526 527const UTrie2 * 528Normalizer2Factory::getFCDTrie(UErrorCode &errorCode) { 529 Norm2AllModes *allModes= 530 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 531 if(allModes!=NULL) { 532 return allModes->impl.getFCDTrie(errorCode); 533 } else { 534 return NULL; 535 } 536} 537 538const Normalizer2 * 539Normalizer2::getInstance(const char *packageName, 540 const char *name, 541 UNormalization2Mode mode, 542 UErrorCode &errorCode) { 543 if(U_FAILURE(errorCode)) { 544 return NULL; 545 } 546 if(name==NULL || *name==0) { 547 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 548 } 549 Norm2AllModes *allModes=NULL; 550 if(packageName==NULL) { 551 if(0==uprv_strcmp(name, "nfc")) { 552 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 553 } else if(0==uprv_strcmp(name, "nfkc")) { 554 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 555 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 556 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 557 } 558 } 559 if(allModes==NULL && U_SUCCESS(errorCode)) { 560 { 561 Mutex lock; 562 if(cache!=NULL) { 563 allModes=(Norm2AllModes *)uhash_get(cache, name); 564 } 565 } 566 if(allModes==NULL) { 567 LocalPointer<Norm2AllModes> localAllModes( 568 Norm2AllModes::createInstance(packageName, name, errorCode)); 569 if(U_SUCCESS(errorCode)) { 570 Mutex lock; 571 if(cache==NULL) { 572 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 573 if(U_FAILURE(errorCode)) { 574 return NULL; 575 } 576 uhash_setKeyDeleter(cache, uprv_free); 577 uhash_setValueDeleter(cache, deleteNorm2AllModes); 578 } 579 void *temp=uhash_get(cache, name); 580 if(temp==NULL) { 581 int32_t keyLength=uprv_strlen(name)+1; 582 char *nameCopy=(char *)uprv_malloc(keyLength); 583 if(nameCopy==NULL) { 584 errorCode=U_MEMORY_ALLOCATION_ERROR; 585 return NULL; 586 } 587 uprv_memcpy(nameCopy, name, keyLength); 588 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 589 } else { 590 // race condition 591 allModes=(Norm2AllModes *)temp; 592 } 593 } 594 } 595 } 596 if(allModes!=NULL && U_SUCCESS(errorCode)) { 597 switch(mode) { 598 case UNORM2_COMPOSE: 599 return &allModes->comp; 600 case UNORM2_DECOMPOSE: 601 return &allModes->decomp; 602 case UNORM2_FCD: 603 allModes->impl.getFCDTrie(errorCode); 604 return &allModes->fcd; 605 case UNORM2_COMPOSE_CONTIGUOUS: 606 return &allModes->fcc; 607 default: 608 break; // do nothing 609 } 610 } 611 return NULL; 612} 613 614UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Normalizer2) 615 616U_NAMESPACE_END 617 618// C API ------------------------------------------------------------------- *** 619 620U_NAMESPACE_USE 621 622U_DRAFT const UNormalizer2 * U_EXPORT2 623unorm2_getInstance(const char *packageName, 624 const char *name, 625 UNormalization2Mode mode, 626 UErrorCode *pErrorCode) { 627 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 628} 629 630U_DRAFT void U_EXPORT2 631unorm2_close(UNormalizer2 *norm2) { 632 delete (Normalizer2 *)norm2; 633} 634 635U_DRAFT int32_t U_EXPORT2 636unorm2_normalize(const UNormalizer2 *norm2, 637 const UChar *src, int32_t length, 638 UChar *dest, int32_t capacity, 639 UErrorCode *pErrorCode) { 640 if(U_FAILURE(*pErrorCode)) { 641 return 0; 642 } 643 if(src==NULL || length<-1 || capacity<0 || (dest==NULL && capacity>0) || src==dest) { 644 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 645 return 0; 646 } 647 UnicodeString destString(dest, 0, capacity); 648 const Normalizer2 *n2=(const Normalizer2 *)norm2; 649 if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) { 650 // Avoid duplicate argument checking and support NUL-terminated src. 651 const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2; 652 ReorderingBuffer buffer(n2wi->impl, destString); 653 if(buffer.init(length, *pErrorCode)) { 654 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 655 } 656 } else { 657 UnicodeString srcString(length<0, src, length); 658 n2->normalize(srcString, destString, *pErrorCode); 659 } 660 return destString.extract(dest, capacity, *pErrorCode); 661} 662 663static int32_t 664normalizeSecondAndAppend(const UNormalizer2 *norm2, 665 UChar *first, int32_t firstLength, int32_t firstCapacity, 666 const UChar *second, int32_t secondLength, 667 UBool doNormalize, 668 UErrorCode *pErrorCode) { 669 if(U_FAILURE(*pErrorCode)) { 670 return 0; 671 } 672 if( second==NULL || secondLength<-1 || 673 firstCapacity<0 || (first==NULL && firstCapacity>0) || firstLength<-1 || 674 first==second 675 ) { 676 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 677 return 0; 678 } 679 UnicodeString firstString(first, firstLength, firstCapacity); 680 const Normalizer2 *n2=(const Normalizer2 *)norm2; 681 if(n2->getDynamicClassID()==Normalizer2WithImpl::getStaticClassID()) { 682 // Avoid duplicate argument checking and support NUL-terminated src. 683 const Normalizer2WithImpl *n2wi=(const Normalizer2WithImpl *)n2; 684 ReorderingBuffer buffer(n2wi->impl, firstString); 685 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 686 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 687 doNormalize, buffer, *pErrorCode); 688 } 689 } else { 690 UnicodeString secondString(secondLength<0, second, secondLength); 691 if(doNormalize) { 692 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 693 } else { 694 n2->append(firstString, secondString, *pErrorCode); 695 } 696 } 697 return firstString.extract(first, firstCapacity, *pErrorCode); 698} 699 700U_DRAFT int32_t U_EXPORT2 701unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 702 UChar *first, int32_t firstLength, int32_t firstCapacity, 703 const UChar *second, int32_t secondLength, 704 UErrorCode *pErrorCode) { 705 return normalizeSecondAndAppend(norm2, 706 first, firstLength, firstCapacity, 707 second, secondLength, 708 TRUE, pErrorCode); 709} 710 711U_DRAFT int32_t U_EXPORT2 712unorm2_append(const UNormalizer2 *norm2, 713 UChar *first, int32_t firstLength, int32_t firstCapacity, 714 const UChar *second, int32_t secondLength, 715 UErrorCode *pErrorCode) { 716 return normalizeSecondAndAppend(norm2, 717 first, firstLength, firstCapacity, 718 second, secondLength, 719 FALSE, pErrorCode); 720} 721 722U_DRAFT UBool U_EXPORT2 723unorm2_isNormalized(const UNormalizer2 *norm2, 724 const UChar *s, int32_t length, 725 UErrorCode *pErrorCode) { 726 if(U_FAILURE(*pErrorCode)) { 727 return 0; 728 } 729 if(s==NULL || length<-1) { 730 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 731 return 0; 732 } 733 UnicodeString sString(length<0, s, length); 734 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 735} 736 737U_DRAFT UNormalizationCheckResult U_EXPORT2 738unorm2_quickCheck(const UNormalizer2 *norm2, 739 const UChar *s, int32_t length, 740 UErrorCode *pErrorCode) { 741 if(U_FAILURE(*pErrorCode)) { 742 return UNORM_NO; 743 } 744 if(s==NULL || length<-1) { 745 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 746 return UNORM_NO; 747 } 748 UnicodeString sString(length<0, s, length); 749 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 750} 751 752U_DRAFT int32_t U_EXPORT2 753unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 754 const UChar *s, int32_t length, 755 UErrorCode *pErrorCode) { 756 if(U_FAILURE(*pErrorCode)) { 757 return 0; 758 } 759 if(s==NULL || length<-1) { 760 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 761 return 0; 762 } 763 UnicodeString sString(length<0, s, length); 764 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 765} 766 767U_DRAFT UBool U_EXPORT2 768unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 769 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 770} 771 772U_DRAFT UBool U_EXPORT2 773unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 774 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 775} 776 777U_DRAFT UBool U_EXPORT2 778unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 779 return ((const Normalizer2 *)norm2)->isInert(c); 780} 781 782// Some properties APIs ---------------------------------------------------- *** 783 784U_CFUNC UNormalizationCheckResult U_EXPORT2 785unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 786 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 787 return UNORM_YES; 788 } 789 UErrorCode errorCode=U_ZERO_ERROR; 790 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 791 if(U_SUCCESS(errorCode)) { 792 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 793 } else { 794 return UNORM_MAYBE; 795 } 796} 797 798U_CAPI const uint16_t * U_EXPORT2 799unorm_getFCDTrieIndex(UChar32 &fcdHighStart, UErrorCode *pErrorCode) { 800 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(*pErrorCode); 801 if(U_SUCCESS(*pErrorCode)) { 802 fcdHighStart=trie->highStart; 803 return trie->index; 804 } else { 805 return NULL; 806 } 807} 808 809#endif // !UCONFIG_NO_NORMALIZATION 810