1/* 2******************************************************************************* 3* 4* Copyright (C) 2009-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: normalizer2.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009nov22 14* created by: Markus W. Scherer 15*/ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_NORMALIZATION 20 21#include "unicode/localpointer.h" 22#include "unicode/normalizer2.h" 23#include "unicode/unistr.h" 24#include "unicode/unorm.h" 25#include "cpputils.h" 26#include "cstring.h" 27#include "mutex.h" 28#include "normalizer2impl.h" 29#include "uassert.h" 30#include "ucln_cmn.h" 31#include "uhash.h" 32 33U_NAMESPACE_BEGIN 34 35// Public API dispatch via Normalizer2 subclasses -------------------------- *** 36 37Normalizer2::~Normalizer2() {} 38 39UBool 40Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { 41 return FALSE; 42} 43 44UChar32 45Normalizer2::composePair(UChar32, UChar32) const { 46 return U_SENTINEL; 47} 48 49uint8_t 50Normalizer2::getCombiningClass(UChar32 /*c*/) const { 51 return 0; 52} 53 54// Normalizer2 implementation for the old UNORM_NONE. 55class NoopNormalizer2 : public Normalizer2 { 56 virtual ~NoopNormalizer2(); 57 58 virtual UnicodeString & 59 normalize(const UnicodeString &src, 60 UnicodeString &dest, 61 UErrorCode &errorCode) const { 62 if(U_SUCCESS(errorCode)) { 63 if(&dest!=&src) { 64 dest=src; 65 } else { 66 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 67 } 68 } 69 return dest; 70 } 71 virtual UnicodeString & 72 normalizeSecondAndAppend(UnicodeString &first, 73 const UnicodeString &second, 74 UErrorCode &errorCode) const { 75 if(U_SUCCESS(errorCode)) { 76 if(&first!=&second) { 77 first.append(second); 78 } else { 79 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 80 } 81 } 82 return first; 83 } 84 virtual UnicodeString & 85 append(UnicodeString &first, 86 const UnicodeString &second, 87 UErrorCode &errorCode) const { 88 if(U_SUCCESS(errorCode)) { 89 if(&first!=&second) { 90 first.append(second); 91 } else { 92 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 93 } 94 } 95 return first; 96 } 97 virtual UBool 98 getDecomposition(UChar32, UnicodeString &) const { 99 return FALSE; 100 } 101 // No need to override the default getRawDecomposition(). 102 virtual UBool 103 isNormalized(const UnicodeString &, UErrorCode &) const { 104 return TRUE; 105 } 106 virtual UNormalizationCheckResult 107 quickCheck(const UnicodeString &, UErrorCode &) const { 108 return UNORM_YES; 109 } 110 virtual int32_t 111 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 112 return s.length(); 113 } 114 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 115 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 116 virtual UBool isInert(UChar32) const { return TRUE; } 117}; 118 119NoopNormalizer2::~NoopNormalizer2() {} 120 121// Intermediate class: 122// Has Normalizer2Impl and does boilerplate argument checking and setup. 123class Normalizer2WithImpl : public Normalizer2 { 124public: 125 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 126 virtual ~Normalizer2WithImpl(); 127 128 // normalize 129 virtual UnicodeString & 130 normalize(const UnicodeString &src, 131 UnicodeString &dest, 132 UErrorCode &errorCode) const { 133 if(U_FAILURE(errorCode)) { 134 dest.setToBogus(); 135 return dest; 136 } 137 const UChar *sArray=src.getBuffer(); 138 if(&dest==&src || sArray==NULL) { 139 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 140 dest.setToBogus(); 141 return dest; 142 } 143 dest.remove(); 144 ReorderingBuffer buffer(impl, dest); 145 if(buffer.init(src.length(), errorCode)) { 146 normalize(sArray, sArray+src.length(), buffer, errorCode); 147 } 148 return dest; 149 } 150 virtual void 151 normalize(const UChar *src, const UChar *limit, 152 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 153 154 // normalize and append 155 virtual UnicodeString & 156 normalizeSecondAndAppend(UnicodeString &first, 157 const UnicodeString &second, 158 UErrorCode &errorCode) const { 159 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 160 } 161 virtual UnicodeString & 162 append(UnicodeString &first, 163 const UnicodeString &second, 164 UErrorCode &errorCode) const { 165 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 166 } 167 UnicodeString & 168 normalizeSecondAndAppend(UnicodeString &first, 169 const UnicodeString &second, 170 UBool doNormalize, 171 UErrorCode &errorCode) const { 172 uprv_checkCanGetBuffer(first, errorCode); 173 if(U_FAILURE(errorCode)) { 174 return first; 175 } 176 const UChar *secondArray=second.getBuffer(); 177 if(&first==&second || secondArray==NULL) { 178 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 179 return first; 180 } 181 int32_t firstLength=first.length(); 182 UnicodeString safeMiddle; 183 { 184 ReorderingBuffer buffer(impl, first); 185 if(buffer.init(firstLength+second.length(), errorCode)) { 186 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 187 safeMiddle, buffer, errorCode); 188 } 189 } // The ReorderingBuffer destructor finalizes the first string. 190 if(U_FAILURE(errorCode)) { 191 // Restore the modified suffix of the first string. 192 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 193 } 194 return first; 195 } 196 virtual void 197 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 198 UnicodeString &safeMiddle, 199 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 200 virtual UBool 201 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 202 UChar buffer[4]; 203 int32_t length; 204 const UChar *d=impl.getDecomposition(c, buffer, length); 205 if(d==NULL) { 206 return FALSE; 207 } 208 if(d==buffer) { 209 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 210 } else { 211 decomposition.setTo(FALSE, d, length); // read-only alias 212 } 213 return TRUE; 214 } 215 virtual UBool 216 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 217 UChar buffer[30]; 218 int32_t length; 219 const UChar *d=impl.getRawDecomposition(c, buffer, length); 220 if(d==NULL) { 221 return FALSE; 222 } 223 if(d==buffer) { 224 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 225 } else { 226 decomposition.setTo(FALSE, d, length); // read-only alias 227 } 228 return TRUE; 229 } 230 virtual UChar32 231 composePair(UChar32 a, UChar32 b) const { 232 return impl.composePair(a, b); 233 } 234 235 virtual uint8_t 236 getCombiningClass(UChar32 c) const { 237 return impl.getCC(impl.getNorm16(c)); 238 } 239 240 // quick checks 241 virtual UBool 242 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 243 if(U_FAILURE(errorCode)) { 244 return FALSE; 245 } 246 const UChar *sArray=s.getBuffer(); 247 if(sArray==NULL) { 248 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 249 return FALSE; 250 } 251 const UChar *sLimit=sArray+s.length(); 252 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 253 } 254 virtual UNormalizationCheckResult 255 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 256 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 257 } 258 virtual int32_t 259 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 260 if(U_FAILURE(errorCode)) { 261 return 0; 262 } 263 const UChar *sArray=s.getBuffer(); 264 if(sArray==NULL) { 265 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 266 return 0; 267 } 268 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 269 } 270 virtual const UChar * 271 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 272 273 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 274 return UNORM_YES; 275 } 276 277 const Normalizer2Impl &impl; 278}; 279 280Normalizer2WithImpl::~Normalizer2WithImpl() {} 281 282class DecomposeNormalizer2 : public Normalizer2WithImpl { 283public: 284 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 285 virtual ~DecomposeNormalizer2(); 286 287private: 288 virtual void 289 normalize(const UChar *src, const UChar *limit, 290 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 291 impl.decompose(src, limit, &buffer, errorCode); 292 } 293 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 294 virtual void 295 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 296 UnicodeString &safeMiddle, 297 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 298 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 299 } 300 virtual const UChar * 301 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 302 return impl.decompose(src, limit, NULL, errorCode); 303 } 304 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 305 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 306 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 307 } 308 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 309 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 310 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 311}; 312 313DecomposeNormalizer2::~DecomposeNormalizer2() {} 314 315class ComposeNormalizer2 : public Normalizer2WithImpl { 316public: 317 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 318 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 319 virtual ~ComposeNormalizer2(); 320 321private: 322 virtual void 323 normalize(const UChar *src, const UChar *limit, 324 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 325 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 326 } 327 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 328 virtual void 329 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 330 UnicodeString &safeMiddle, 331 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 332 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 333 } 334 335 virtual UBool 336 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 337 if(U_FAILURE(errorCode)) { 338 return FALSE; 339 } 340 const UChar *sArray=s.getBuffer(); 341 if(sArray==NULL) { 342 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 343 return FALSE; 344 } 345 UnicodeString temp; 346 ReorderingBuffer buffer(impl, temp); 347 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 348 return FALSE; 349 } 350 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 351 } 352 virtual UNormalizationCheckResult 353 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 354 if(U_FAILURE(errorCode)) { 355 return UNORM_MAYBE; 356 } 357 const UChar *sArray=s.getBuffer(); 358 if(sArray==NULL) { 359 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 360 return UNORM_MAYBE; 361 } 362 UNormalizationCheckResult qcResult=UNORM_YES; 363 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 364 return qcResult; 365 } 366 virtual const UChar * 367 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 368 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 369 } 370 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 371 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 372 return impl.getCompQuickCheck(impl.getNorm16(c)); 373 } 374 virtual UBool hasBoundaryBefore(UChar32 c) const { 375 return impl.hasCompBoundaryBefore(c); 376 } 377 virtual UBool hasBoundaryAfter(UChar32 c) const { 378 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 379 } 380 virtual UBool isInert(UChar32 c) const { 381 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 382 } 383 384 const UBool onlyContiguous; 385}; 386 387ComposeNormalizer2::~ComposeNormalizer2() {} 388 389class FCDNormalizer2 : public Normalizer2WithImpl { 390public: 391 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 392 virtual ~FCDNormalizer2(); 393 394private: 395 virtual void 396 normalize(const UChar *src, const UChar *limit, 397 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 398 impl.makeFCD(src, limit, &buffer, errorCode); 399 } 400 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 401 virtual void 402 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 403 UnicodeString &safeMiddle, 404 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 405 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 406 } 407 virtual const UChar * 408 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 409 return impl.makeFCD(src, limit, NULL, errorCode); 410 } 411 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 412 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 413 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 414 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 415}; 416 417FCDNormalizer2::~FCDNormalizer2() {} 418 419// instance cache ---------------------------------------------------------- *** 420 421struct Norm2AllModes : public UMemory { 422 static Norm2AllModes *createInstance(const char *packageName, 423 const char *name, 424 UErrorCode &errorCode); 425 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 426 427 Normalizer2Impl impl; 428 ComposeNormalizer2 comp; 429 DecomposeNormalizer2 decomp; 430 FCDNormalizer2 fcd; 431 ComposeNormalizer2 fcc; 432}; 433 434Norm2AllModes * 435Norm2AllModes::createInstance(const char *packageName, 436 const char *name, 437 UErrorCode &errorCode) { 438 if(U_FAILURE(errorCode)) { 439 return NULL; 440 } 441 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 442 if(allModes.isNull()) { 443 errorCode=U_MEMORY_ALLOCATION_ERROR; 444 return NULL; 445 } 446 allModes->impl.load(packageName, name, errorCode); 447 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 448} 449 450U_CDECL_BEGIN 451static UBool U_CALLCONV uprv_normalizer2_cleanup(); 452U_CDECL_END 453 454 455static Norm2AllModes *nfcSingleton; 456static Norm2AllModes *nfkcSingleton; 457static Norm2AllModes *nfkc_cfSingleton; 458static Normalizer2 *noopSingleton; 459static UHashtable *cache=NULL; 460 461static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; 462static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; 463static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; 464static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; 465 466// UInitOnce singleton initialization function 467static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 468 if (uprv_strcmp(what, "nfc") == 0) { 469 nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); 470 } else if (uprv_strcmp(what, "nfkc") == 0) { 471 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); 472 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 473 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); 474 } else if (uprv_strcmp(what, "noop") == 0) { 475 noopSingleton = new NoopNormalizer2; 476 } else { 477 U_ASSERT(FALSE); // Unknown singleton 478 } 479 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 480} 481 482U_CDECL_BEGIN 483 484static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 485 delete (Norm2AllModes *)allModes; 486} 487 488static UBool U_CALLCONV uprv_normalizer2_cleanup() { 489 delete nfcSingleton; 490 nfcSingleton = NULL; 491 delete nfkcSingleton; 492 nfkcSingleton = NULL; 493 delete nfkc_cfSingleton; 494 nfkc_cfSingleton = NULL; 495 delete noopSingleton; 496 noopSingleton = NULL; 497 uhash_close(cache); 498 cache=NULL; 499 nfcInitOnce.reset(); 500 nfkcInitOnce.reset(); 501 nfkc_cfInitOnce.reset(); 502 noopInitOnce.reset(); 503 return TRUE; 504} 505 506U_CDECL_END 507 508const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 509 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 510 return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL; 511} 512 513const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 514 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 515 return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL; 516} 517 518const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 519 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 520 return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL; 521} 522 523const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 524 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 525 return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL; 526} 527 528const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 529 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 530 return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL; 531} 532 533const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 534 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 535 return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL; 536} 537 538const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 539 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 540 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL; 541} 542 543const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 544 umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode); 545 return noopSingleton; 546} 547 548const Normalizer2 * 549Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 550 if(U_FAILURE(errorCode)) { 551 return NULL; 552 } 553 switch(mode) { 554 case UNORM_NFD: 555 return getNFDInstance(errorCode); 556 case UNORM_NFKD: 557 return getNFKDInstance(errorCode); 558 case UNORM_NFC: 559 return getNFCInstance(errorCode); 560 case UNORM_NFKC: 561 return getNFKCInstance(errorCode); 562 case UNORM_FCD: 563 return getFCDInstance(errorCode); 564 default: // UNORM_NONE 565 return getNoopInstance(errorCode); 566 } 567} 568 569const Normalizer2Impl * 570Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 571 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 572 return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL; 573} 574 575const Normalizer2Impl * 576Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 577 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 578 return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL; 579} 580 581const Normalizer2Impl * 582Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 583 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 584 return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL; 585} 586 587const Normalizer2Impl * 588Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 589 return &((Normalizer2WithImpl *)norm2)->impl; 590} 591 592const Normalizer2 * 593Normalizer2::getNFCInstance(UErrorCode &errorCode) { 594 return Normalizer2Factory::getNFCInstance(errorCode); 595} 596 597const Normalizer2 * 598Normalizer2::getNFDInstance(UErrorCode &errorCode) { 599 return Normalizer2Factory::getNFDInstance(errorCode); 600} 601 602const Normalizer2 * 603Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 604 return Normalizer2Factory::getNFKCInstance(errorCode); 605} 606 607const Normalizer2 * 608Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 609 return Normalizer2Factory::getNFKDInstance(errorCode); 610} 611 612const Normalizer2 * 613Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 614 return Normalizer2Factory::getNFKC_CFInstance(errorCode); 615} 616 617const Normalizer2 * 618Normalizer2::getInstance(const char *packageName, 619 const char *name, 620 UNormalization2Mode mode, 621 UErrorCode &errorCode) { 622 if(U_FAILURE(errorCode)) { 623 return NULL; 624 } 625 if(name==NULL || *name==0) { 626 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 627 return NULL; 628 } 629 Norm2AllModes *allModes=NULL; 630 if(packageName==NULL) { 631 if(0==uprv_strcmp(name, "nfc")) { 632 umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); 633 allModes=nfcSingleton; 634 } else if(0==uprv_strcmp(name, "nfkc")) { 635 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 636 allModes=nfkcSingleton; 637 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 638 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 639 allModes=nfkc_cfSingleton; 640 } 641 } 642 if(allModes==NULL && U_SUCCESS(errorCode)) { 643 { 644 Mutex lock; 645 if(cache!=NULL) { 646 allModes=(Norm2AllModes *)uhash_get(cache, name); 647 } 648 } 649 if(allModes==NULL) { 650 LocalPointer<Norm2AllModes> localAllModes( 651 Norm2AllModes::createInstance(packageName, name, errorCode)); 652 if(U_SUCCESS(errorCode)) { 653 Mutex lock; 654 if(cache==NULL) { 655 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 656 if(U_FAILURE(errorCode)) { 657 return NULL; 658 } 659 uhash_setKeyDeleter(cache, uprv_free); 660 uhash_setValueDeleter(cache, deleteNorm2AllModes); 661 } 662 void *temp=uhash_get(cache, name); 663 if(temp==NULL) { 664 int32_t keyLength=uprv_strlen(name)+1; 665 char *nameCopy=(char *)uprv_malloc(keyLength); 666 if(nameCopy==NULL) { 667 errorCode=U_MEMORY_ALLOCATION_ERROR; 668 return NULL; 669 } 670 uprv_memcpy(nameCopy, name, keyLength); 671 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 672 } else { 673 // race condition 674 allModes=(Norm2AllModes *)temp; 675 } 676 } 677 } 678 } 679 if(allModes!=NULL && U_SUCCESS(errorCode)) { 680 switch(mode) { 681 case UNORM2_COMPOSE: 682 return &allModes->comp; 683 case UNORM2_DECOMPOSE: 684 return &allModes->decomp; 685 case UNORM2_FCD: 686 return &allModes->fcd; 687 case UNORM2_COMPOSE_CONTIGUOUS: 688 return &allModes->fcc; 689 default: 690 break; // do nothing 691 } 692 } 693 return NULL; 694} 695 696U_NAMESPACE_END 697 698// C API ------------------------------------------------------------------- *** 699 700U_NAMESPACE_USE 701 702U_CAPI const UNormalizer2 * U_EXPORT2 703unorm2_getNFCInstance(UErrorCode *pErrorCode) { 704 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); 705} 706 707U_CAPI const UNormalizer2 * U_EXPORT2 708unorm2_getNFDInstance(UErrorCode *pErrorCode) { 709 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); 710} 711 712U_CAPI const UNormalizer2 * U_EXPORT2 713unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 714 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 715} 716 717U_CAPI const UNormalizer2 * U_EXPORT2 718unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 719 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 720} 721 722U_CAPI const UNormalizer2 * U_EXPORT2 723unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 724 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 725} 726 727U_CAPI const UNormalizer2 * U_EXPORT2 728unorm2_getInstance(const char *packageName, 729 const char *name, 730 UNormalization2Mode mode, 731 UErrorCode *pErrorCode) { 732 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 733} 734 735U_CAPI void U_EXPORT2 736unorm2_close(UNormalizer2 *norm2) { 737 delete (Normalizer2 *)norm2; 738} 739 740U_CAPI int32_t U_EXPORT2 741unorm2_normalize(const UNormalizer2 *norm2, 742 const UChar *src, int32_t length, 743 UChar *dest, int32_t capacity, 744 UErrorCode *pErrorCode) { 745 if(U_FAILURE(*pErrorCode)) { 746 return 0; 747 } 748 if( (src==NULL ? length!=0 : length<-1) || 749 (dest==NULL ? capacity!=0 : capacity<0) || 750 (src==dest && src!=NULL) 751 ) { 752 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 753 return 0; 754 } 755 UnicodeString destString(dest, 0, capacity); 756 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 757 if(length!=0) { 758 const Normalizer2 *n2=(const Normalizer2 *)norm2; 759 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 760 if(n2wi!=NULL) { 761 // Avoid duplicate argument checking and support NUL-terminated src. 762 ReorderingBuffer buffer(n2wi->impl, destString); 763 if(buffer.init(length, *pErrorCode)) { 764 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 765 } 766 } else { 767 UnicodeString srcString(length<0, src, length); 768 n2->normalize(srcString, destString, *pErrorCode); 769 } 770 } 771 return destString.extract(dest, capacity, *pErrorCode); 772} 773 774static int32_t 775normalizeSecondAndAppend(const UNormalizer2 *norm2, 776 UChar *first, int32_t firstLength, int32_t firstCapacity, 777 const UChar *second, int32_t secondLength, 778 UBool doNormalize, 779 UErrorCode *pErrorCode) { 780 if(U_FAILURE(*pErrorCode)) { 781 return 0; 782 } 783 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 784 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 785 (firstCapacity<0 || firstLength<-1)) || 786 (first==second && first!=NULL) 787 ) { 788 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 789 return 0; 790 } 791 UnicodeString firstString(first, firstLength, firstCapacity); 792 firstLength=firstString.length(); // In case it was -1. 793 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 794 if(secondLength!=0) { 795 const Normalizer2 *n2=(const Normalizer2 *)norm2; 796 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 797 if(n2wi!=NULL) { 798 // Avoid duplicate argument checking and support NUL-terminated src. 799 UnicodeString safeMiddle; 800 { 801 ReorderingBuffer buffer(n2wi->impl, firstString); 802 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 803 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 804 doNormalize, safeMiddle, buffer, *pErrorCode); 805 } 806 } // The ReorderingBuffer destructor finalizes firstString. 807 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { 808 // Restore the modified suffix of the first string. 809 // This does not restore first[] array contents between firstLength and firstCapacity. 810 // (That might be uninitialized memory, as far as we know.) 811 if(first!=NULL) { /* don't dereference NULL */ 812 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); 813 if(firstLength<firstCapacity) { 814 first[firstLength]=0; // NUL-terminate in case it was originally. 815 } 816 } 817 } 818 } else { 819 UnicodeString secondString(secondLength<0, second, secondLength); 820 if(doNormalize) { 821 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 822 } else { 823 n2->append(firstString, secondString, *pErrorCode); 824 } 825 } 826 } 827 return firstString.extract(first, firstCapacity, *pErrorCode); 828} 829 830U_CAPI int32_t U_EXPORT2 831unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 832 UChar *first, int32_t firstLength, int32_t firstCapacity, 833 const UChar *second, int32_t secondLength, 834 UErrorCode *pErrorCode) { 835 return normalizeSecondAndAppend(norm2, 836 first, firstLength, firstCapacity, 837 second, secondLength, 838 TRUE, pErrorCode); 839} 840 841U_CAPI int32_t U_EXPORT2 842unorm2_append(const UNormalizer2 *norm2, 843 UChar *first, int32_t firstLength, int32_t firstCapacity, 844 const UChar *second, int32_t secondLength, 845 UErrorCode *pErrorCode) { 846 return normalizeSecondAndAppend(norm2, 847 first, firstLength, firstCapacity, 848 second, secondLength, 849 FALSE, pErrorCode); 850} 851 852U_CAPI int32_t U_EXPORT2 853unorm2_getDecomposition(const UNormalizer2 *norm2, 854 UChar32 c, UChar *decomposition, int32_t capacity, 855 UErrorCode *pErrorCode) { 856 if(U_FAILURE(*pErrorCode)) { 857 return 0; 858 } 859 if(decomposition==NULL ? capacity!=0 : capacity<0) { 860 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 861 return 0; 862 } 863 UnicodeString destString(decomposition, 0, capacity); 864 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 865 return destString.extract(decomposition, capacity, *pErrorCode); 866 } else { 867 return -1; 868 } 869} 870 871U_CAPI int32_t U_EXPORT2 872unorm2_getRawDecomposition(const UNormalizer2 *norm2, 873 UChar32 c, UChar *decomposition, int32_t capacity, 874 UErrorCode *pErrorCode) { 875 if(U_FAILURE(*pErrorCode)) { 876 return 0; 877 } 878 if(decomposition==NULL ? capacity!=0 : capacity<0) { 879 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 880 return 0; 881 } 882 UnicodeString destString(decomposition, 0, capacity); 883 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { 884 return destString.extract(decomposition, capacity, *pErrorCode); 885 } else { 886 return -1; 887 } 888} 889 890U_CAPI UChar32 U_EXPORT2 891unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { 892 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); 893} 894 895U_CAPI uint8_t U_EXPORT2 896unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { 897 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); 898} 899 900U_CAPI UBool U_EXPORT2 901unorm2_isNormalized(const UNormalizer2 *norm2, 902 const UChar *s, int32_t length, 903 UErrorCode *pErrorCode) { 904 if(U_FAILURE(*pErrorCode)) { 905 return 0; 906 } 907 if((s==NULL && length!=0) || length<-1) { 908 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 909 return 0; 910 } 911 UnicodeString sString(length<0, s, length); 912 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 913} 914 915U_CAPI UNormalizationCheckResult U_EXPORT2 916unorm2_quickCheck(const UNormalizer2 *norm2, 917 const UChar *s, int32_t length, 918 UErrorCode *pErrorCode) { 919 if(U_FAILURE(*pErrorCode)) { 920 return UNORM_NO; 921 } 922 if((s==NULL && length!=0) || length<-1) { 923 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 924 return UNORM_NO; 925 } 926 UnicodeString sString(length<0, s, length); 927 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 928} 929 930U_CAPI int32_t U_EXPORT2 931unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 932 const UChar *s, int32_t length, 933 UErrorCode *pErrorCode) { 934 if(U_FAILURE(*pErrorCode)) { 935 return 0; 936 } 937 if((s==NULL && length!=0) || length<-1) { 938 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 939 return 0; 940 } 941 UnicodeString sString(length<0, s, length); 942 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 943} 944 945U_CAPI UBool U_EXPORT2 946unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 947 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 948} 949 950U_CAPI UBool U_EXPORT2 951unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 952 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 953} 954 955U_CAPI UBool U_EXPORT2 956unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 957 return ((const Normalizer2 *)norm2)->isInert(c); 958} 959 960// Some properties APIs ---------------------------------------------------- *** 961 962U_CAPI uint8_t U_EXPORT2 963u_getCombiningClass(UChar32 c) { 964 UErrorCode errorCode=U_ZERO_ERROR; 965 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); 966 if(U_SUCCESS(errorCode)) { 967 return nfd->getCombiningClass(c); 968 } else { 969 return 0; 970 } 971} 972 973U_CFUNC UNormalizationCheckResult 974unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 975 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 976 return UNORM_YES; 977 } 978 UErrorCode errorCode=U_ZERO_ERROR; 979 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 980 if(U_SUCCESS(errorCode)) { 981 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 982 } else { 983 return UNORM_MAYBE; 984 } 985} 986 987U_CFUNC uint16_t 988unorm_getFCD16(UChar32 c) { 989 UErrorCode errorCode=U_ZERO_ERROR; 990 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 991 if(U_SUCCESS(errorCode)) { 992 return impl->getFCD16(c); 993 } else { 994 return 0; 995 } 996} 997 998#endif // !UCONFIG_NO_NORMALIZATION 999