1/* 2******************************************************************************* 3* 4* Copyright (C) 2009-2012, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: normalizer2.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2009nov22 14* created by: Markus W. Scherer 15*/ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_NORMALIZATION 20 21#include "unicode/localpointer.h" 22#include "unicode/normalizer2.h" 23#include "unicode/unistr.h" 24#include "unicode/unorm.h" 25#include "cpputils.h" 26#include "cstring.h" 27#include "mutex.h" 28#include "normalizer2impl.h" 29#include "ucln_cmn.h" 30#include "uhash.h" 31 32U_NAMESPACE_BEGIN 33 34// Public API dispatch via Normalizer2 subclasses -------------------------- *** 35 36Normalizer2::~Normalizer2() {} 37 38UBool 39Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { 40 return FALSE; 41} 42 43UChar32 44Normalizer2::composePair(UChar32, UChar32) const { 45 return U_SENTINEL; 46} 47 48uint8_t 49Normalizer2::getCombiningClass(UChar32 /*c*/) const { 50 return 0; 51} 52 53UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2) 54 55// Normalizer2 implementation for the old UNORM_NONE. 56class NoopNormalizer2 : public Normalizer2 { 57 virtual ~NoopNormalizer2(); 58 59 virtual UnicodeString & 60 normalize(const UnicodeString &src, 61 UnicodeString &dest, 62 UErrorCode &errorCode) const { 63 if(U_SUCCESS(errorCode)) { 64 if(&dest!=&src) { 65 dest=src; 66 } else { 67 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 68 } 69 } 70 return dest; 71 } 72 virtual UnicodeString & 73 normalizeSecondAndAppend(UnicodeString &first, 74 const UnicodeString &second, 75 UErrorCode &errorCode) const { 76 if(U_SUCCESS(errorCode)) { 77 if(&first!=&second) { 78 first.append(second); 79 } else { 80 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 81 } 82 } 83 return first; 84 } 85 virtual UnicodeString & 86 append(UnicodeString &first, 87 const UnicodeString &second, 88 UErrorCode &errorCode) const { 89 if(U_SUCCESS(errorCode)) { 90 if(&first!=&second) { 91 first.append(second); 92 } else { 93 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 94 } 95 } 96 return first; 97 } 98 virtual UBool 99 getDecomposition(UChar32, UnicodeString &) const { 100 return FALSE; 101 } 102 // No need to override the default getRawDecomposition(). 103 virtual UBool 104 isNormalized(const UnicodeString &, UErrorCode &) const { 105 return TRUE; 106 } 107 virtual UNormalizationCheckResult 108 quickCheck(const UnicodeString &, UErrorCode &) const { 109 return UNORM_YES; 110 } 111 virtual int32_t 112 spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const { 113 return s.length(); 114 } 115 virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; } 116 virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; } 117 virtual UBool isInert(UChar32) const { return TRUE; } 118}; 119 120NoopNormalizer2::~NoopNormalizer2() {} 121 122// Intermediate class: 123// Has Normalizer2Impl and does boilerplate argument checking and setup. 124class Normalizer2WithImpl : public Normalizer2 { 125public: 126 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 127 virtual ~Normalizer2WithImpl(); 128 129 // normalize 130 virtual UnicodeString & 131 normalize(const UnicodeString &src, 132 UnicodeString &dest, 133 UErrorCode &errorCode) const { 134 if(U_FAILURE(errorCode)) { 135 dest.setToBogus(); 136 return dest; 137 } 138 const UChar *sArray=src.getBuffer(); 139 if(&dest==&src || sArray==NULL) { 140 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 141 dest.setToBogus(); 142 return dest; 143 } 144 dest.remove(); 145 ReorderingBuffer buffer(impl, dest); 146 if(buffer.init(src.length(), errorCode)) { 147 normalize(sArray, sArray+src.length(), buffer, errorCode); 148 } 149 return dest; 150 } 151 virtual void 152 normalize(const UChar *src, const UChar *limit, 153 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 154 155 // normalize and append 156 virtual UnicodeString & 157 normalizeSecondAndAppend(UnicodeString &first, 158 const UnicodeString &second, 159 UErrorCode &errorCode) const { 160 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 161 } 162 virtual UnicodeString & 163 append(UnicodeString &first, 164 const UnicodeString &second, 165 UErrorCode &errorCode) const { 166 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 167 } 168 UnicodeString & 169 normalizeSecondAndAppend(UnicodeString &first, 170 const UnicodeString &second, 171 UBool doNormalize, 172 UErrorCode &errorCode) const { 173 uprv_checkCanGetBuffer(first, errorCode); 174 if(U_FAILURE(errorCode)) { 175 return first; 176 } 177 const UChar *secondArray=second.getBuffer(); 178 if(&first==&second || secondArray==NULL) { 179 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 180 return first; 181 } 182 int32_t firstLength=first.length(); 183 UnicodeString safeMiddle; 184 { 185 ReorderingBuffer buffer(impl, first); 186 if(buffer.init(firstLength+second.length(), errorCode)) { 187 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 188 safeMiddle, buffer, errorCode); 189 } 190 } // The ReorderingBuffer destructor finalizes the first string. 191 if(U_FAILURE(errorCode)) { 192 // Restore the modified suffix of the first string. 193 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 194 } 195 return first; 196 } 197 virtual void 198 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 199 UnicodeString &safeMiddle, 200 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 201 virtual UBool 202 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 203 UChar buffer[4]; 204 int32_t length; 205 const UChar *d=impl.getDecomposition(c, buffer, length); 206 if(d==NULL) { 207 return FALSE; 208 } 209 if(d==buffer) { 210 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 211 } else { 212 decomposition.setTo(FALSE, d, length); // read-only alias 213 } 214 return TRUE; 215 } 216 virtual UBool 217 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 218 UChar buffer[30]; 219 int32_t length; 220 const UChar *d=impl.getRawDecomposition(c, buffer, length); 221 if(d==NULL) { 222 return FALSE; 223 } 224 if(d==buffer) { 225 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 226 } else { 227 decomposition.setTo(FALSE, d, length); // read-only alias 228 } 229 return TRUE; 230 } 231 virtual UChar32 232 composePair(UChar32 a, UChar32 b) const { 233 return impl.composePair(a, b); 234 } 235 236 virtual uint8_t 237 getCombiningClass(UChar32 c) const { 238 return impl.getCC(impl.getNorm16(c)); 239 } 240 241 // quick checks 242 virtual UBool 243 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 244 if(U_FAILURE(errorCode)) { 245 return FALSE; 246 } 247 const UChar *sArray=s.getBuffer(); 248 if(sArray==NULL) { 249 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 250 return FALSE; 251 } 252 const UChar *sLimit=sArray+s.length(); 253 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 254 } 255 virtual UNormalizationCheckResult 256 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 257 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 258 } 259 virtual int32_t 260 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 261 if(U_FAILURE(errorCode)) { 262 return 0; 263 } 264 const UChar *sArray=s.getBuffer(); 265 if(sArray==NULL) { 266 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 267 return 0; 268 } 269 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 270 } 271 virtual const UChar * 272 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 273 274 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 275 return UNORM_YES; 276 } 277 278 const Normalizer2Impl &impl; 279}; 280 281Normalizer2WithImpl::~Normalizer2WithImpl() {} 282 283class DecomposeNormalizer2 : public Normalizer2WithImpl { 284public: 285 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 286 virtual ~DecomposeNormalizer2(); 287 288private: 289 virtual void 290 normalize(const UChar *src, const UChar *limit, 291 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 292 impl.decompose(src, limit, &buffer, errorCode); 293 } 294 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 295 virtual void 296 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 297 UnicodeString &safeMiddle, 298 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 299 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 300 } 301 virtual const UChar * 302 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 303 return impl.decompose(src, limit, NULL, errorCode); 304 } 305 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 306 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 307 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 308 } 309 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 310 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 311 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 312}; 313 314DecomposeNormalizer2::~DecomposeNormalizer2() {} 315 316class ComposeNormalizer2 : public Normalizer2WithImpl { 317public: 318 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 319 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 320 virtual ~ComposeNormalizer2(); 321 322private: 323 virtual void 324 normalize(const UChar *src, const UChar *limit, 325 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 326 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 327 } 328 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 329 virtual void 330 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 331 UnicodeString &safeMiddle, 332 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 333 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 334 } 335 336 virtual UBool 337 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 338 if(U_FAILURE(errorCode)) { 339 return FALSE; 340 } 341 const UChar *sArray=s.getBuffer(); 342 if(sArray==NULL) { 343 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 344 return FALSE; 345 } 346 UnicodeString temp; 347 ReorderingBuffer buffer(impl, temp); 348 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 349 return FALSE; 350 } 351 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 352 } 353 virtual UNormalizationCheckResult 354 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 355 if(U_FAILURE(errorCode)) { 356 return UNORM_MAYBE; 357 } 358 const UChar *sArray=s.getBuffer(); 359 if(sArray==NULL) { 360 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 361 return UNORM_MAYBE; 362 } 363 UNormalizationCheckResult qcResult=UNORM_YES; 364 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 365 return qcResult; 366 } 367 virtual const UChar * 368 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 369 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 370 } 371 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 372 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 373 return impl.getCompQuickCheck(impl.getNorm16(c)); 374 } 375 virtual UBool hasBoundaryBefore(UChar32 c) const { 376 return impl.hasCompBoundaryBefore(c); 377 } 378 virtual UBool hasBoundaryAfter(UChar32 c) const { 379 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 380 } 381 virtual UBool isInert(UChar32 c) const { 382 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 383 } 384 385 const UBool onlyContiguous; 386}; 387 388ComposeNormalizer2::~ComposeNormalizer2() {} 389 390class FCDNormalizer2 : public Normalizer2WithImpl { 391public: 392 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 393 virtual ~FCDNormalizer2(); 394 395private: 396 virtual void 397 normalize(const UChar *src, const UChar *limit, 398 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 399 impl.makeFCD(src, limit, &buffer, errorCode); 400 } 401 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 402 virtual void 403 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 404 UnicodeString &safeMiddle, 405 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 406 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 407 } 408 virtual const UChar * 409 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 410 return impl.makeFCD(src, limit, NULL, errorCode); 411 } 412 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 413 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 414 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 415 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 416}; 417 418FCDNormalizer2::~FCDNormalizer2() {} 419 420// instance cache ---------------------------------------------------------- *** 421 422struct Norm2AllModes : public UMemory { 423 static Norm2AllModes *createInstance(const char *packageName, 424 const char *name, 425 UErrorCode &errorCode); 426 Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {} 427 428 Normalizer2Impl impl; 429 ComposeNormalizer2 comp; 430 DecomposeNormalizer2 decomp; 431 FCDNormalizer2 fcd; 432 ComposeNormalizer2 fcc; 433}; 434 435Norm2AllModes * 436Norm2AllModes::createInstance(const char *packageName, 437 const char *name, 438 UErrorCode &errorCode) { 439 if(U_FAILURE(errorCode)) { 440 return NULL; 441 } 442 LocalPointer<Norm2AllModes> allModes(new Norm2AllModes); 443 if(allModes.isNull()) { 444 errorCode=U_MEMORY_ALLOCATION_ERROR; 445 return NULL; 446 } 447 allModes->impl.load(packageName, name, errorCode); 448 return U_SUCCESS(errorCode) ? allModes.orphan() : NULL; 449} 450 451U_CDECL_BEGIN 452static UBool U_CALLCONV uprv_normalizer2_cleanup(); 453U_CDECL_END 454 455class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> { 456public: 457 Norm2AllModesSingleton(TriStateSingleton &s, const char *n) : 458 TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {} 459 Norm2AllModes *getInstance(UErrorCode &errorCode) { 460 return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode); 461 } 462private: 463 static void *createInstance(const void *context, UErrorCode &errorCode) { 464 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 465 return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode); 466 } 467 468 const char *name; 469}; 470 471STATIC_TRI_STATE_SINGLETON(nfcSingleton); 472STATIC_TRI_STATE_SINGLETON(nfkcSingleton); 473STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton); 474 475class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> { 476public: 477 Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {} 478 Normalizer2 *getInstance(UErrorCode &errorCode) { 479 return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode); 480 } 481private: 482 static void *createInstance(const void *, UErrorCode &errorCode) { 483 Normalizer2 *noop=new NoopNormalizer2; 484 if(noop==NULL) { 485 errorCode=U_MEMORY_ALLOCATION_ERROR; 486 } 487 ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); 488 return noop; 489 } 490}; 491 492STATIC_SIMPLE_SINGLETON(noopSingleton); 493 494static UHashtable *cache=NULL; 495 496U_CDECL_BEGIN 497 498static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 499 delete (Norm2AllModes *)allModes; 500} 501 502static UBool U_CALLCONV uprv_normalizer2_cleanup() { 503 Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance(); 504 Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance(); 505 Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance(); 506 Norm2Singleton(noopSingleton).deleteInstance(); 507 uhash_close(cache); 508 cache=NULL; 509 return TRUE; 510} 511 512U_CDECL_END 513 514const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) { 515 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 516 return allModes!=NULL ? &allModes->comp : NULL; 517} 518 519const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) { 520 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 521 return allModes!=NULL ? &allModes->decomp : NULL; 522} 523 524const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { 525 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 526 return allModes!=NULL ? &allModes->fcd : NULL; 527} 528 529const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { 530 Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 531 return allModes!=NULL ? &allModes->fcc : NULL; 532} 533 534const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) { 535 Norm2AllModes *allModes= 536 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 537 return allModes!=NULL ? &allModes->comp : NULL; 538} 539 540const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) { 541 Norm2AllModes *allModes= 542 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 543 return allModes!=NULL ? &allModes->decomp : NULL; 544} 545 546const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) { 547 Norm2AllModes *allModes= 548 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 549 return allModes!=NULL ? &allModes->comp : NULL; 550} 551 552const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { 553 return Norm2Singleton(noopSingleton).getInstance(errorCode); 554} 555 556const Normalizer2 * 557Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 558 if(U_FAILURE(errorCode)) { 559 return NULL; 560 } 561 switch(mode) { 562 case UNORM_NFD: 563 return getNFDInstance(errorCode); 564 case UNORM_NFKD: 565 return getNFKDInstance(errorCode); 566 case UNORM_NFC: 567 return getNFCInstance(errorCode); 568 case UNORM_NFKC: 569 return getNFKCInstance(errorCode); 570 case UNORM_FCD: 571 return getFCDInstance(errorCode); 572 default: // UNORM_NONE 573 return getNoopInstance(errorCode); 574 } 575} 576 577const Normalizer2Impl * 578Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { 579 Norm2AllModes *allModes= 580 Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 581 return allModes!=NULL ? &allModes->impl : NULL; 582} 583 584const Normalizer2Impl * 585Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 586 Norm2AllModes *allModes= 587 Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 588 return allModes!=NULL ? &allModes->impl : NULL; 589} 590 591const Normalizer2Impl * 592Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 593 Norm2AllModes *allModes= 594 Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 595 return allModes!=NULL ? &allModes->impl : NULL; 596} 597 598const Normalizer2Impl * 599Normalizer2Factory::getImpl(const Normalizer2 *norm2) { 600 return &((Normalizer2WithImpl *)norm2)->impl; 601} 602 603const Normalizer2 * 604Normalizer2::getNFCInstance(UErrorCode &errorCode) { 605 return Normalizer2Factory::getNFCInstance(errorCode); 606} 607 608const Normalizer2 * 609Normalizer2::getNFDInstance(UErrorCode &errorCode) { 610 return Normalizer2Factory::getNFDInstance(errorCode); 611} 612 613const Normalizer2 * 614Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 615 return Normalizer2Factory::getNFKCInstance(errorCode); 616} 617 618const Normalizer2 * 619Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 620 return Normalizer2Factory::getNFKDInstance(errorCode); 621} 622 623const Normalizer2 * 624Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 625 return Normalizer2Factory::getNFKC_CFInstance(errorCode); 626} 627 628const Normalizer2 * 629Normalizer2::getInstance(const char *packageName, 630 const char *name, 631 UNormalization2Mode mode, 632 UErrorCode &errorCode) { 633 if(U_FAILURE(errorCode)) { 634 return NULL; 635 } 636 if(name==NULL || *name==0) { 637 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 638 return NULL; 639 } 640 Norm2AllModes *allModes=NULL; 641 if(packageName==NULL) { 642 if(0==uprv_strcmp(name, "nfc")) { 643 allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode); 644 } else if(0==uprv_strcmp(name, "nfkc")) { 645 allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode); 646 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 647 allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode); 648 } 649 } 650 if(allModes==NULL && U_SUCCESS(errorCode)) { 651 { 652 Mutex lock; 653 if(cache!=NULL) { 654 allModes=(Norm2AllModes *)uhash_get(cache, name); 655 } 656 } 657 if(allModes==NULL) { 658 LocalPointer<Norm2AllModes> localAllModes( 659 Norm2AllModes::createInstance(packageName, name, errorCode)); 660 if(U_SUCCESS(errorCode)) { 661 Mutex lock; 662 if(cache==NULL) { 663 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 664 if(U_FAILURE(errorCode)) { 665 return NULL; 666 } 667 uhash_setKeyDeleter(cache, uprv_free); 668 uhash_setValueDeleter(cache, deleteNorm2AllModes); 669 } 670 void *temp=uhash_get(cache, name); 671 if(temp==NULL) { 672 int32_t keyLength=uprv_strlen(name)+1; 673 char *nameCopy=(char *)uprv_malloc(keyLength); 674 if(nameCopy==NULL) { 675 errorCode=U_MEMORY_ALLOCATION_ERROR; 676 return NULL; 677 } 678 uprv_memcpy(nameCopy, name, keyLength); 679 uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode); 680 } else { 681 // race condition 682 allModes=(Norm2AllModes *)temp; 683 } 684 } 685 } 686 } 687 if(allModes!=NULL && U_SUCCESS(errorCode)) { 688 switch(mode) { 689 case UNORM2_COMPOSE: 690 return &allModes->comp; 691 case UNORM2_DECOMPOSE: 692 return &allModes->decomp; 693 case UNORM2_FCD: 694 return &allModes->fcd; 695 case UNORM2_COMPOSE_CONTIGUOUS: 696 return &allModes->fcc; 697 default: 698 break; // do nothing 699 } 700 } 701 return NULL; 702} 703 704U_NAMESPACE_END 705 706// C API ------------------------------------------------------------------- *** 707 708U_NAMESPACE_USE 709 710U_CAPI const UNormalizer2 * U_EXPORT2 711unorm2_getNFCInstance(UErrorCode *pErrorCode) { 712 return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); 713} 714 715U_CAPI const UNormalizer2 * U_EXPORT2 716unorm2_getNFDInstance(UErrorCode *pErrorCode) { 717 return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); 718} 719 720U_CAPI const UNormalizer2 * U_EXPORT2 721unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 722 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 723} 724 725U_CAPI const UNormalizer2 * U_EXPORT2 726unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 727 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 728} 729 730U_CAPI const UNormalizer2 * U_EXPORT2 731unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 732 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 733} 734 735U_CAPI const UNormalizer2 * U_EXPORT2 736unorm2_getInstance(const char *packageName, 737 const char *name, 738 UNormalization2Mode mode, 739 UErrorCode *pErrorCode) { 740 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 741} 742 743U_CAPI void U_EXPORT2 744unorm2_close(UNormalizer2 *norm2) { 745 delete (Normalizer2 *)norm2; 746} 747 748U_CAPI int32_t U_EXPORT2 749unorm2_normalize(const UNormalizer2 *norm2, 750 const UChar *src, int32_t length, 751 UChar *dest, int32_t capacity, 752 UErrorCode *pErrorCode) { 753 if(U_FAILURE(*pErrorCode)) { 754 return 0; 755 } 756 if( (src==NULL ? length!=0 : length<-1) || 757 (dest==NULL ? capacity!=0 : capacity<0) || 758 (src==dest && src!=NULL) 759 ) { 760 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 761 return 0; 762 } 763 UnicodeString destString(dest, 0, capacity); 764 // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. 765 if(length!=0) { 766 const Normalizer2 *n2=(const Normalizer2 *)norm2; 767 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 768 if(n2wi!=NULL) { 769 // Avoid duplicate argument checking and support NUL-terminated src. 770 ReorderingBuffer buffer(n2wi->impl, destString); 771 if(buffer.init(length, *pErrorCode)) { 772 n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); 773 } 774 } else { 775 UnicodeString srcString(length<0, src, length); 776 n2->normalize(srcString, destString, *pErrorCode); 777 } 778 } 779 return destString.extract(dest, capacity, *pErrorCode); 780} 781 782static int32_t 783normalizeSecondAndAppend(const UNormalizer2 *norm2, 784 UChar *first, int32_t firstLength, int32_t firstCapacity, 785 const UChar *second, int32_t secondLength, 786 UBool doNormalize, 787 UErrorCode *pErrorCode) { 788 if(U_FAILURE(*pErrorCode)) { 789 return 0; 790 } 791 if( (second==NULL ? secondLength!=0 : secondLength<-1) || 792 (first==NULL ? (firstCapacity!=0 || firstLength!=0) : 793 (firstCapacity<0 || firstLength<-1)) || 794 (first==second && first!=NULL) 795 ) { 796 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 797 return 0; 798 } 799 UnicodeString firstString(first, firstLength, firstCapacity); 800 firstLength=firstString.length(); // In case it was -1. 801 // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. 802 if(secondLength!=0) { 803 const Normalizer2 *n2=(const Normalizer2 *)norm2; 804 const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); 805 if(n2wi!=NULL) { 806 // Avoid duplicate argument checking and support NUL-terminated src. 807 UnicodeString safeMiddle; 808 { 809 ReorderingBuffer buffer(n2wi->impl, firstString); 810 if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 811 n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, 812 doNormalize, safeMiddle, buffer, *pErrorCode); 813 } 814 } // The ReorderingBuffer destructor finalizes firstString. 815 if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { 816 // Restore the modified suffix of the first string. 817 // This does not restore first[] array contents between firstLength and firstCapacity. 818 // (That might be uninitialized memory, as far as we know.) 819 if(first!=NULL) { /* don't dereference NULL */ 820 safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); 821 if(firstLength<firstCapacity) { 822 first[firstLength]=0; // NUL-terminate in case it was originally. 823 } 824 } 825 } 826 } else { 827 UnicodeString secondString(secondLength<0, second, secondLength); 828 if(doNormalize) { 829 n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); 830 } else { 831 n2->append(firstString, secondString, *pErrorCode); 832 } 833 } 834 } 835 return firstString.extract(first, firstCapacity, *pErrorCode); 836} 837 838U_CAPI int32_t U_EXPORT2 839unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 840 UChar *first, int32_t firstLength, int32_t firstCapacity, 841 const UChar *second, int32_t secondLength, 842 UErrorCode *pErrorCode) { 843 return normalizeSecondAndAppend(norm2, 844 first, firstLength, firstCapacity, 845 second, secondLength, 846 TRUE, pErrorCode); 847} 848 849U_CAPI int32_t U_EXPORT2 850unorm2_append(const UNormalizer2 *norm2, 851 UChar *first, int32_t firstLength, int32_t firstCapacity, 852 const UChar *second, int32_t secondLength, 853 UErrorCode *pErrorCode) { 854 return normalizeSecondAndAppend(norm2, 855 first, firstLength, firstCapacity, 856 second, secondLength, 857 FALSE, pErrorCode); 858} 859 860U_CAPI int32_t U_EXPORT2 861unorm2_getDecomposition(const UNormalizer2 *norm2, 862 UChar32 c, UChar *decomposition, int32_t capacity, 863 UErrorCode *pErrorCode) { 864 if(U_FAILURE(*pErrorCode)) { 865 return 0; 866 } 867 if(decomposition==NULL ? capacity!=0 : capacity<0) { 868 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 869 return 0; 870 } 871 UnicodeString destString(decomposition, 0, capacity); 872 if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { 873 return destString.extract(decomposition, capacity, *pErrorCode); 874 } else { 875 return -1; 876 } 877} 878 879U_CAPI int32_t U_EXPORT2 880unorm2_getRawDecomposition(const UNormalizer2 *norm2, 881 UChar32 c, UChar *decomposition, int32_t capacity, 882 UErrorCode *pErrorCode) { 883 if(U_FAILURE(*pErrorCode)) { 884 return 0; 885 } 886 if(decomposition==NULL ? capacity!=0 : capacity<0) { 887 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 888 return 0; 889 } 890 UnicodeString destString(decomposition, 0, capacity); 891 if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { 892 return destString.extract(decomposition, capacity, *pErrorCode); 893 } else { 894 return -1; 895 } 896} 897 898U_CAPI UChar32 U_EXPORT2 899unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { 900 return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); 901} 902 903U_CAPI uint8_t U_EXPORT2 904unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { 905 return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); 906} 907 908U_CAPI UBool U_EXPORT2 909unorm2_isNormalized(const UNormalizer2 *norm2, 910 const UChar *s, int32_t length, 911 UErrorCode *pErrorCode) { 912 if(U_FAILURE(*pErrorCode)) { 913 return 0; 914 } 915 if((s==NULL && length!=0) || length<-1) { 916 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 917 return 0; 918 } 919 UnicodeString sString(length<0, s, length); 920 return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); 921} 922 923U_CAPI UNormalizationCheckResult U_EXPORT2 924unorm2_quickCheck(const UNormalizer2 *norm2, 925 const UChar *s, int32_t length, 926 UErrorCode *pErrorCode) { 927 if(U_FAILURE(*pErrorCode)) { 928 return UNORM_NO; 929 } 930 if((s==NULL && length!=0) || length<-1) { 931 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 932 return UNORM_NO; 933 } 934 UnicodeString sString(length<0, s, length); 935 return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); 936} 937 938U_CAPI int32_t U_EXPORT2 939unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 940 const UChar *s, int32_t length, 941 UErrorCode *pErrorCode) { 942 if(U_FAILURE(*pErrorCode)) { 943 return 0; 944 } 945 if((s==NULL && length!=0) || length<-1) { 946 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 947 return 0; 948 } 949 UnicodeString sString(length<0, s, length); 950 return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); 951} 952 953U_CAPI UBool U_EXPORT2 954unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { 955 return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); 956} 957 958U_CAPI UBool U_EXPORT2 959unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { 960 return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); 961} 962 963U_CAPI UBool U_EXPORT2 964unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { 965 return ((const Normalizer2 *)norm2)->isInert(c); 966} 967 968// Some properties APIs ---------------------------------------------------- *** 969 970U_CAPI uint8_t U_EXPORT2 971u_getCombiningClass(UChar32 c) { 972 UErrorCode errorCode=U_ZERO_ERROR; 973 const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode); 974 if(U_SUCCESS(errorCode)) { 975 return nfd->getCombiningClass(c); 976 } else { 977 return 0; 978 } 979} 980 981U_CFUNC UNormalizationCheckResult 982unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 983 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 984 return UNORM_YES; 985 } 986 UErrorCode errorCode=U_ZERO_ERROR; 987 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 988 if(U_SUCCESS(errorCode)) { 989 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 990 } else { 991 return UNORM_MAYBE; 992 } 993} 994 995U_CFUNC uint16_t 996unorm_getFCD16(UChar32 c) { 997 UErrorCode errorCode=U_ZERO_ERROR; 998 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 999 if(U_SUCCESS(errorCode)) { 1000 return impl->getFCD16(c); 1001 } else { 1002 return 0; 1003 } 1004} 1005 1006#endif // !UCONFIG_NO_NORMALIZATION 1007