1/*
2*******************************************************************************
3*
4*   Copyright (C) 2009-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  normalizer2.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009nov22
14*   created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/localpointer.h"
22#include "unicode/normalizer2.h"
23#include "unicode/unistr.h"
24#include "unicode/unorm.h"
25#include "cpputils.h"
26#include "cstring.h"
27#include "mutex.h"
28#include "normalizer2impl.h"
29#include "uassert.h"
30#include "ucln_cmn.h"
31#include "uhash.h"
32
33U_NAMESPACE_BEGIN
34
35// Public API dispatch via Normalizer2 subclasses -------------------------- ***
36
37Normalizer2::~Normalizer2() {}
38
39UBool
40Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
41    return FALSE;
42}
43
44UChar32
45Normalizer2::composePair(UChar32, UChar32) const {
46    return U_SENTINEL;
47}
48
49uint8_t
50Normalizer2::getCombiningClass(UChar32 /*c*/) const {
51    return 0;
52}
53
54// Normalizer2 implementation for the old UNORM_NONE.
55class NoopNormalizer2 : public Normalizer2 {
56    virtual ~NoopNormalizer2();
57
58    virtual UnicodeString &
59    normalize(const UnicodeString &src,
60              UnicodeString &dest,
61              UErrorCode &errorCode) const {
62        if(U_SUCCESS(errorCode)) {
63            if(&dest!=&src) {
64                dest=src;
65            } else {
66                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
67            }
68        }
69        return dest;
70    }
71    virtual UnicodeString &
72    normalizeSecondAndAppend(UnicodeString &first,
73                             const UnicodeString &second,
74                             UErrorCode &errorCode) const {
75        if(U_SUCCESS(errorCode)) {
76            if(&first!=&second) {
77                first.append(second);
78            } else {
79                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
80            }
81        }
82        return first;
83    }
84    virtual UnicodeString &
85    append(UnicodeString &first,
86           const UnicodeString &second,
87           UErrorCode &errorCode) const {
88        if(U_SUCCESS(errorCode)) {
89            if(&first!=&second) {
90                first.append(second);
91            } else {
92                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
93            }
94        }
95        return first;
96    }
97    virtual UBool
98    getDecomposition(UChar32, UnicodeString &) const {
99        return FALSE;
100    }
101    // No need to override the default getRawDecomposition().
102    virtual UBool
103    isNormalized(const UnicodeString &, UErrorCode &) const {
104        return TRUE;
105    }
106    virtual UNormalizationCheckResult
107    quickCheck(const UnicodeString &, UErrorCode &) const {
108        return UNORM_YES;
109    }
110    virtual int32_t
111    spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
112        return s.length();
113    }
114    virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
115    virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
116    virtual UBool isInert(UChar32) const { return TRUE; }
117};
118
119NoopNormalizer2::~NoopNormalizer2() {}
120
121// Intermediate class:
122// Has Normalizer2Impl and does boilerplate argument checking and setup.
123class Normalizer2WithImpl : public Normalizer2 {
124public:
125    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
126    virtual ~Normalizer2WithImpl();
127
128    // normalize
129    virtual UnicodeString &
130    normalize(const UnicodeString &src,
131              UnicodeString &dest,
132              UErrorCode &errorCode) const {
133        if(U_FAILURE(errorCode)) {
134            dest.setToBogus();
135            return dest;
136        }
137        const UChar *sArray=src.getBuffer();
138        if(&dest==&src || sArray==NULL) {
139            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
140            dest.setToBogus();
141            return dest;
142        }
143        dest.remove();
144        ReorderingBuffer buffer(impl, dest);
145        if(buffer.init(src.length(), errorCode)) {
146            normalize(sArray, sArray+src.length(), buffer, errorCode);
147        }
148        return dest;
149    }
150    virtual void
151    normalize(const UChar *src, const UChar *limit,
152              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
153
154    // normalize and append
155    virtual UnicodeString &
156    normalizeSecondAndAppend(UnicodeString &first,
157                             const UnicodeString &second,
158                             UErrorCode &errorCode) const {
159        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
160    }
161    virtual UnicodeString &
162    append(UnicodeString &first,
163           const UnicodeString &second,
164           UErrorCode &errorCode) const {
165        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
166    }
167    UnicodeString &
168    normalizeSecondAndAppend(UnicodeString &first,
169                             const UnicodeString &second,
170                             UBool doNormalize,
171                             UErrorCode &errorCode) const {
172        uprv_checkCanGetBuffer(first, errorCode);
173        if(U_FAILURE(errorCode)) {
174            return first;
175        }
176        const UChar *secondArray=second.getBuffer();
177        if(&first==&second || secondArray==NULL) {
178            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
179            return first;
180        }
181        int32_t firstLength=first.length();
182        UnicodeString safeMiddle;
183        {
184            ReorderingBuffer buffer(impl, first);
185            if(buffer.init(firstLength+second.length(), errorCode)) {
186                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
187                                   safeMiddle, buffer, errorCode);
188            }
189        }  // The ReorderingBuffer destructor finalizes the first string.
190        if(U_FAILURE(errorCode)) {
191            // Restore the modified suffix of the first string.
192            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
193        }
194        return first;
195    }
196    virtual void
197    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
198                       UnicodeString &safeMiddle,
199                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
200    virtual UBool
201    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
202        UChar buffer[4];
203        int32_t length;
204        const UChar *d=impl.getDecomposition(c, buffer, length);
205        if(d==NULL) {
206            return FALSE;
207        }
208        if(d==buffer) {
209            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
210        } else {
211            decomposition.setTo(FALSE, d, length);  // read-only alias
212        }
213        return TRUE;
214    }
215    virtual UBool
216    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
217        UChar buffer[30];
218        int32_t length;
219        const UChar *d=impl.getRawDecomposition(c, buffer, length);
220        if(d==NULL) {
221            return FALSE;
222        }
223        if(d==buffer) {
224            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
225        } else {
226            decomposition.setTo(FALSE, d, length);  // read-only alias
227        }
228        return TRUE;
229    }
230    virtual UChar32
231    composePair(UChar32 a, UChar32 b) const {
232        return impl.composePair(a, b);
233    }
234
235    virtual uint8_t
236    getCombiningClass(UChar32 c) const {
237        return impl.getCC(impl.getNorm16(c));
238    }
239
240    // quick checks
241    virtual UBool
242    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
243        if(U_FAILURE(errorCode)) {
244            return FALSE;
245        }
246        const UChar *sArray=s.getBuffer();
247        if(sArray==NULL) {
248            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
249            return FALSE;
250        }
251        const UChar *sLimit=sArray+s.length();
252        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
253    }
254    virtual UNormalizationCheckResult
255    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
256        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
257    }
258    virtual int32_t
259    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
260        if(U_FAILURE(errorCode)) {
261            return 0;
262        }
263        const UChar *sArray=s.getBuffer();
264        if(sArray==NULL) {
265            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
266            return 0;
267        }
268        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
269    }
270    virtual const UChar *
271    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
272
273    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
274        return UNORM_YES;
275    }
276
277    const Normalizer2Impl &impl;
278};
279
280Normalizer2WithImpl::~Normalizer2WithImpl() {}
281
282class DecomposeNormalizer2 : public Normalizer2WithImpl {
283public:
284    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
285    virtual ~DecomposeNormalizer2();
286
287private:
288    virtual void
289    normalize(const UChar *src, const UChar *limit,
290              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
291        impl.decompose(src, limit, &buffer, errorCode);
292    }
293    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
294    virtual void
295    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
296                       UnicodeString &safeMiddle,
297                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
298        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
299    }
300    virtual const UChar *
301    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
302        return impl.decompose(src, limit, NULL, errorCode);
303    }
304    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
305    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
306        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
307    }
308    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
309    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
310    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
311};
312
313DecomposeNormalizer2::~DecomposeNormalizer2() {}
314
315class ComposeNormalizer2 : public Normalizer2WithImpl {
316public:
317    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
318        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
319    virtual ~ComposeNormalizer2();
320
321private:
322    virtual void
323    normalize(const UChar *src, const UChar *limit,
324              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
325        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
326    }
327    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
328    virtual void
329    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
330                       UnicodeString &safeMiddle,
331                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
332        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
333    }
334
335    virtual UBool
336    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
337        if(U_FAILURE(errorCode)) {
338            return FALSE;
339        }
340        const UChar *sArray=s.getBuffer();
341        if(sArray==NULL) {
342            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
343            return FALSE;
344        }
345        UnicodeString temp;
346        ReorderingBuffer buffer(impl, temp);
347        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
348            return FALSE;
349        }
350        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
351    }
352    virtual UNormalizationCheckResult
353    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
354        if(U_FAILURE(errorCode)) {
355            return UNORM_MAYBE;
356        }
357        const UChar *sArray=s.getBuffer();
358        if(sArray==NULL) {
359            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
360            return UNORM_MAYBE;
361        }
362        UNormalizationCheckResult qcResult=UNORM_YES;
363        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
364        return qcResult;
365    }
366    virtual const UChar *
367    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
368        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
369    }
370    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
371    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
372        return impl.getCompQuickCheck(impl.getNorm16(c));
373    }
374    virtual UBool hasBoundaryBefore(UChar32 c) const {
375        return impl.hasCompBoundaryBefore(c);
376    }
377    virtual UBool hasBoundaryAfter(UChar32 c) const {
378        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
379    }
380    virtual UBool isInert(UChar32 c) const {
381        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
382    }
383
384    const UBool onlyContiguous;
385};
386
387ComposeNormalizer2::~ComposeNormalizer2() {}
388
389class FCDNormalizer2 : public Normalizer2WithImpl {
390public:
391    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
392    virtual ~FCDNormalizer2();
393
394private:
395    virtual void
396    normalize(const UChar *src, const UChar *limit,
397              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
398        impl.makeFCD(src, limit, &buffer, errorCode);
399    }
400    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
401    virtual void
402    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
403                       UnicodeString &safeMiddle,
404                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
405        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
406    }
407    virtual const UChar *
408    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
409        return impl.makeFCD(src, limit, NULL, errorCode);
410    }
411    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
412    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
413    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
414    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
415};
416
417FCDNormalizer2::~FCDNormalizer2() {}
418
419// instance cache ---------------------------------------------------------- ***
420
421struct Norm2AllModes : public UMemory {
422    static Norm2AllModes *createInstance(const char *packageName,
423                                         const char *name,
424                                         UErrorCode &errorCode);
425    Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
426
427    Normalizer2Impl impl;
428    ComposeNormalizer2 comp;
429    DecomposeNormalizer2 decomp;
430    FCDNormalizer2 fcd;
431    ComposeNormalizer2 fcc;
432};
433
434Norm2AllModes *
435Norm2AllModes::createInstance(const char *packageName,
436                              const char *name,
437                              UErrorCode &errorCode) {
438    if(U_FAILURE(errorCode)) {
439        return NULL;
440    }
441    LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
442    if(allModes.isNull()) {
443        errorCode=U_MEMORY_ALLOCATION_ERROR;
444        return NULL;
445    }
446    allModes->impl.load(packageName, name, errorCode);
447    return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
448}
449
450U_CDECL_BEGIN
451static UBool U_CALLCONV uprv_normalizer2_cleanup();
452U_CDECL_END
453
454
455static Norm2AllModes *nfcSingleton;
456static Norm2AllModes *nfkcSingleton;
457static Norm2AllModes *nfkc_cfSingleton;
458static Normalizer2   *noopSingleton;
459static UHashtable    *cache=NULL;
460
461static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
462static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
463static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
464static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER;
465
466// UInitOnce singleton initialization function
467static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
468    if (uprv_strcmp(what, "nfc") == 0) {
469        nfcSingleton     = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
470    } else if (uprv_strcmp(what, "nfkc") == 0) {
471        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
472    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
473        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
474    } else if (uprv_strcmp(what, "noop") == 0) {
475        noopSingleton    = new NoopNormalizer2;
476    } else {
477        U_ASSERT(FALSE);   // Unknown singleton
478    }
479    ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
480}
481
482U_CDECL_BEGIN
483
484static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
485    delete (Norm2AllModes *)allModes;
486}
487
488static UBool U_CALLCONV uprv_normalizer2_cleanup() {
489    delete nfcSingleton;
490    nfcSingleton = NULL;
491    delete nfkcSingleton;
492    nfkcSingleton = NULL;
493    delete nfkc_cfSingleton;
494    nfkc_cfSingleton = NULL;
495    delete noopSingleton;
496    noopSingleton = NULL;
497    uhash_close(cache);
498    cache=NULL;
499    nfcInitOnce.reset();
500    nfkcInitOnce.reset();
501    nfkc_cfInitOnce.reset();
502    noopInitOnce.reset();
503    return TRUE;
504}
505
506U_CDECL_END
507
508const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
509    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
510    return nfcSingleton!=NULL ? &nfcSingleton->comp : NULL;
511}
512
513const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
514    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
515    return nfcSingleton!=NULL ? &nfcSingleton->decomp : NULL;
516}
517
518const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
519    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
520    return nfcSingleton!=NULL ? &nfcSingleton->fcd : NULL;
521}
522
523const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
524    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
525    return nfcSingleton!=NULL ? &nfcSingleton->fcc : NULL;
526}
527
528const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
529    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
530    return nfkcSingleton!=NULL ? &nfkcSingleton->comp : NULL;
531}
532
533const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
534    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
535    return nfkcSingleton!=NULL ? &nfkcSingleton->decomp : NULL;
536}
537
538const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
539    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
540    return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->comp : NULL;
541}
542
543const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
544    umtx_initOnce(noopInitOnce, &initSingletons, "noop", errorCode);
545    return noopSingleton;
546}
547
548const Normalizer2 *
549Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
550    if(U_FAILURE(errorCode)) {
551        return NULL;
552    }
553    switch(mode) {
554    case UNORM_NFD:
555        return getNFDInstance(errorCode);
556    case UNORM_NFKD:
557        return getNFKDInstance(errorCode);
558    case UNORM_NFC:
559        return getNFCInstance(errorCode);
560    case UNORM_NFKC:
561        return getNFKCInstance(errorCode);
562    case UNORM_FCD:
563        return getFCDInstance(errorCode);
564    default:  // UNORM_NONE
565        return getNoopInstance(errorCode);
566    }
567}
568
569const Normalizer2Impl *
570Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
571    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
572    return nfcSingleton!=NULL ? &nfcSingleton->impl : NULL;
573}
574
575const Normalizer2Impl *
576Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
577    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
578    return nfkcSingleton!=NULL ? &nfkcSingleton->impl : NULL;
579}
580
581const Normalizer2Impl *
582Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
583    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
584    return nfkc_cfSingleton!=NULL ? &nfkc_cfSingleton->impl : NULL;
585}
586
587const Normalizer2Impl *
588Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
589    return &((Normalizer2WithImpl *)norm2)->impl;
590}
591
592const Normalizer2 *
593Normalizer2::getNFCInstance(UErrorCode &errorCode) {
594    return Normalizer2Factory::getNFCInstance(errorCode);
595}
596
597const Normalizer2 *
598Normalizer2::getNFDInstance(UErrorCode &errorCode) {
599    return Normalizer2Factory::getNFDInstance(errorCode);
600}
601
602const Normalizer2 *
603Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
604    return Normalizer2Factory::getNFKCInstance(errorCode);
605}
606
607const Normalizer2 *
608Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
609    return Normalizer2Factory::getNFKDInstance(errorCode);
610}
611
612const Normalizer2 *
613Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
614    return Normalizer2Factory::getNFKC_CFInstance(errorCode);
615}
616
617const Normalizer2 *
618Normalizer2::getInstance(const char *packageName,
619                         const char *name,
620                         UNormalization2Mode mode,
621                         UErrorCode &errorCode) {
622    if(U_FAILURE(errorCode)) {
623        return NULL;
624    }
625    if(name==NULL || *name==0) {
626        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
627        return NULL;
628    }
629    Norm2AllModes *allModes=NULL;
630    if(packageName==NULL) {
631        if(0==uprv_strcmp(name, "nfc")) {
632            umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
633            allModes=nfcSingleton;
634        } else if(0==uprv_strcmp(name, "nfkc")) {
635            umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
636            allModes=nfkcSingleton;
637        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
638            umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
639            allModes=nfkc_cfSingleton;
640        }
641    }
642    if(allModes==NULL && U_SUCCESS(errorCode)) {
643        {
644            Mutex lock;
645            if(cache!=NULL) {
646                allModes=(Norm2AllModes *)uhash_get(cache, name);
647            }
648        }
649        if(allModes==NULL) {
650            LocalPointer<Norm2AllModes> localAllModes(
651                Norm2AllModes::createInstance(packageName, name, errorCode));
652            if(U_SUCCESS(errorCode)) {
653                Mutex lock;
654                if(cache==NULL) {
655                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
656                    if(U_FAILURE(errorCode)) {
657                        return NULL;
658                    }
659                    uhash_setKeyDeleter(cache, uprv_free);
660                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
661                }
662                void *temp=uhash_get(cache, name);
663                if(temp==NULL) {
664                    int32_t keyLength=uprv_strlen(name)+1;
665                    char *nameCopy=(char *)uprv_malloc(keyLength);
666                    if(nameCopy==NULL) {
667                        errorCode=U_MEMORY_ALLOCATION_ERROR;
668                        return NULL;
669                    }
670                    uprv_memcpy(nameCopy, name, keyLength);
671                    uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
672                } else {
673                    // race condition
674                    allModes=(Norm2AllModes *)temp;
675                }
676            }
677        }
678    }
679    if(allModes!=NULL && U_SUCCESS(errorCode)) {
680        switch(mode) {
681        case UNORM2_COMPOSE:
682            return &allModes->comp;
683        case UNORM2_DECOMPOSE:
684            return &allModes->decomp;
685        case UNORM2_FCD:
686            return &allModes->fcd;
687        case UNORM2_COMPOSE_CONTIGUOUS:
688            return &allModes->fcc;
689        default:
690            break;  // do nothing
691        }
692    }
693    return NULL;
694}
695
696U_NAMESPACE_END
697
698// C API ------------------------------------------------------------------- ***
699
700U_NAMESPACE_USE
701
702U_CAPI const UNormalizer2 * U_EXPORT2
703unorm2_getNFCInstance(UErrorCode *pErrorCode) {
704    return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
705}
706
707U_CAPI const UNormalizer2 * U_EXPORT2
708unorm2_getNFDInstance(UErrorCode *pErrorCode) {
709    return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
710}
711
712U_CAPI const UNormalizer2 * U_EXPORT2
713unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
714    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
715}
716
717U_CAPI const UNormalizer2 * U_EXPORT2
718unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
719    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
720}
721
722U_CAPI const UNormalizer2 * U_EXPORT2
723unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
724    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
725}
726
727U_CAPI const UNormalizer2 * U_EXPORT2
728unorm2_getInstance(const char *packageName,
729                   const char *name,
730                   UNormalization2Mode mode,
731                   UErrorCode *pErrorCode) {
732    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
733}
734
735U_CAPI void U_EXPORT2
736unorm2_close(UNormalizer2 *norm2) {
737    delete (Normalizer2 *)norm2;
738}
739
740U_CAPI int32_t U_EXPORT2
741unorm2_normalize(const UNormalizer2 *norm2,
742                 const UChar *src, int32_t length,
743                 UChar *dest, int32_t capacity,
744                 UErrorCode *pErrorCode) {
745    if(U_FAILURE(*pErrorCode)) {
746        return 0;
747    }
748    if( (src==NULL ? length!=0 : length<-1) ||
749        (dest==NULL ? capacity!=0 : capacity<0) ||
750        (src==dest && src!=NULL)
751    ) {
752        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
753        return 0;
754    }
755    UnicodeString destString(dest, 0, capacity);
756    // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
757    if(length!=0) {
758        const Normalizer2 *n2=(const Normalizer2 *)norm2;
759        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
760        if(n2wi!=NULL) {
761            // Avoid duplicate argument checking and support NUL-terminated src.
762            ReorderingBuffer buffer(n2wi->impl, destString);
763            if(buffer.init(length, *pErrorCode)) {
764                n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
765            }
766        } else {
767            UnicodeString srcString(length<0, src, length);
768            n2->normalize(srcString, destString, *pErrorCode);
769        }
770    }
771    return destString.extract(dest, capacity, *pErrorCode);
772}
773
774static int32_t
775normalizeSecondAndAppend(const UNormalizer2 *norm2,
776                         UChar *first, int32_t firstLength, int32_t firstCapacity,
777                         const UChar *second, int32_t secondLength,
778                         UBool doNormalize,
779                         UErrorCode *pErrorCode) {
780    if(U_FAILURE(*pErrorCode)) {
781        return 0;
782    }
783    if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
784        (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
785                       (firstCapacity<0 || firstLength<-1)) ||
786        (first==second && first!=NULL)
787    ) {
788        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
789        return 0;
790    }
791    UnicodeString firstString(first, firstLength, firstCapacity);
792    firstLength=firstString.length();  // In case it was -1.
793    // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
794    if(secondLength!=0) {
795        const Normalizer2 *n2=(const Normalizer2 *)norm2;
796        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
797        if(n2wi!=NULL) {
798            // Avoid duplicate argument checking and support NUL-terminated src.
799            UnicodeString safeMiddle;
800            {
801                ReorderingBuffer buffer(n2wi->impl, firstString);
802                if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
803                    n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
804                                             doNormalize, safeMiddle, buffer, *pErrorCode);
805                }
806            }  // The ReorderingBuffer destructor finalizes firstString.
807            if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
808                // Restore the modified suffix of the first string.
809                // This does not restore first[] array contents between firstLength and firstCapacity.
810                // (That might be uninitialized memory, as far as we know.)
811                if(first!=NULL) { /* don't dereference NULL */
812                  safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
813                  if(firstLength<firstCapacity) {
814                    first[firstLength]=0;  // NUL-terminate in case it was originally.
815                  }
816                }
817            }
818        } else {
819            UnicodeString secondString(secondLength<0, second, secondLength);
820            if(doNormalize) {
821                n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
822            } else {
823                n2->append(firstString, secondString, *pErrorCode);
824            }
825        }
826    }
827    return firstString.extract(first, firstCapacity, *pErrorCode);
828}
829
830U_CAPI int32_t U_EXPORT2
831unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
832                                UChar *first, int32_t firstLength, int32_t firstCapacity,
833                                const UChar *second, int32_t secondLength,
834                                UErrorCode *pErrorCode) {
835    return normalizeSecondAndAppend(norm2,
836                                    first, firstLength, firstCapacity,
837                                    second, secondLength,
838                                    TRUE, pErrorCode);
839}
840
841U_CAPI int32_t U_EXPORT2
842unorm2_append(const UNormalizer2 *norm2,
843              UChar *first, int32_t firstLength, int32_t firstCapacity,
844              const UChar *second, int32_t secondLength,
845              UErrorCode *pErrorCode) {
846    return normalizeSecondAndAppend(norm2,
847                                    first, firstLength, firstCapacity,
848                                    second, secondLength,
849                                    FALSE, pErrorCode);
850}
851
852U_CAPI int32_t U_EXPORT2
853unorm2_getDecomposition(const UNormalizer2 *norm2,
854                        UChar32 c, UChar *decomposition, int32_t capacity,
855                        UErrorCode *pErrorCode) {
856    if(U_FAILURE(*pErrorCode)) {
857        return 0;
858    }
859    if(decomposition==NULL ? capacity!=0 : capacity<0) {
860        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
861        return 0;
862    }
863    UnicodeString destString(decomposition, 0, capacity);
864    if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
865        return destString.extract(decomposition, capacity, *pErrorCode);
866    } else {
867        return -1;
868    }
869}
870
871U_CAPI int32_t U_EXPORT2
872unorm2_getRawDecomposition(const UNormalizer2 *norm2,
873                           UChar32 c, UChar *decomposition, int32_t capacity,
874                           UErrorCode *pErrorCode) {
875    if(U_FAILURE(*pErrorCode)) {
876        return 0;
877    }
878    if(decomposition==NULL ? capacity!=0 : capacity<0) {
879        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
880        return 0;
881    }
882    UnicodeString destString(decomposition, 0, capacity);
883    if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
884        return destString.extract(decomposition, capacity, *pErrorCode);
885    } else {
886        return -1;
887    }
888}
889
890U_CAPI UChar32 U_EXPORT2
891unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
892    return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
893}
894
895U_CAPI uint8_t U_EXPORT2
896unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
897    return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
898}
899
900U_CAPI UBool U_EXPORT2
901unorm2_isNormalized(const UNormalizer2 *norm2,
902                    const UChar *s, int32_t length,
903                    UErrorCode *pErrorCode) {
904    if(U_FAILURE(*pErrorCode)) {
905        return 0;
906    }
907    if((s==NULL && length!=0) || length<-1) {
908        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
909        return 0;
910    }
911    UnicodeString sString(length<0, s, length);
912    return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
913}
914
915U_CAPI UNormalizationCheckResult U_EXPORT2
916unorm2_quickCheck(const UNormalizer2 *norm2,
917                  const UChar *s, int32_t length,
918                  UErrorCode *pErrorCode) {
919    if(U_FAILURE(*pErrorCode)) {
920        return UNORM_NO;
921    }
922    if((s==NULL && length!=0) || length<-1) {
923        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
924        return UNORM_NO;
925    }
926    UnicodeString sString(length<0, s, length);
927    return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
928}
929
930U_CAPI int32_t U_EXPORT2
931unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
932                         const UChar *s, int32_t length,
933                         UErrorCode *pErrorCode) {
934    if(U_FAILURE(*pErrorCode)) {
935        return 0;
936    }
937    if((s==NULL && length!=0) || length<-1) {
938        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
939        return 0;
940    }
941    UnicodeString sString(length<0, s, length);
942    return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
943}
944
945U_CAPI UBool U_EXPORT2
946unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
947    return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
948}
949
950U_CAPI UBool U_EXPORT2
951unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
952    return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
953}
954
955U_CAPI UBool U_EXPORT2
956unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
957    return ((const Normalizer2 *)norm2)->isInert(c);
958}
959
960// Some properties APIs ---------------------------------------------------- ***
961
962U_CAPI uint8_t U_EXPORT2
963u_getCombiningClass(UChar32 c) {
964    UErrorCode errorCode=U_ZERO_ERROR;
965    const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
966    if(U_SUCCESS(errorCode)) {
967        return nfd->getCombiningClass(c);
968    } else {
969        return 0;
970    }
971}
972
973U_CFUNC UNormalizationCheckResult
974unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
975    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
976        return UNORM_YES;
977    }
978    UErrorCode errorCode=U_ZERO_ERROR;
979    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
980    if(U_SUCCESS(errorCode)) {
981        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
982    } else {
983        return UNORM_MAYBE;
984    }
985}
986
987U_CFUNC uint16_t
988unorm_getFCD16(UChar32 c) {
989    UErrorCode errorCode=U_ZERO_ERROR;
990    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
991    if(U_SUCCESS(errorCode)) {
992        return impl->getFCD16(c);
993    } else {
994        return 0;
995    }
996}
997
998#endif  // !UCONFIG_NO_NORMALIZATION
999