1/*
2*******************************************************************************
3*
4*   Copyright (C) 2009-2012, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  normalizer2.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2009nov22
14*   created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/localpointer.h"
22#include "unicode/normalizer2.h"
23#include "unicode/unistr.h"
24#include "unicode/unorm.h"
25#include "cpputils.h"
26#include "cstring.h"
27#include "mutex.h"
28#include "normalizer2impl.h"
29#include "ucln_cmn.h"
30#include "uhash.h"
31
32U_NAMESPACE_BEGIN
33
34// Public API dispatch via Normalizer2 subclasses -------------------------- ***
35
36Normalizer2::~Normalizer2() {}
37
38UBool
39Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const {
40    return FALSE;
41}
42
43UChar32
44Normalizer2::composePair(UChar32, UChar32) const {
45    return U_SENTINEL;
46}
47
48uint8_t
49Normalizer2::getCombiningClass(UChar32 /*c*/) const {
50    return 0;
51}
52
53UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(Normalizer2)
54
55// Normalizer2 implementation for the old UNORM_NONE.
56class NoopNormalizer2 : public Normalizer2 {
57    virtual ~NoopNormalizer2();
58
59    virtual UnicodeString &
60    normalize(const UnicodeString &src,
61              UnicodeString &dest,
62              UErrorCode &errorCode) const {
63        if(U_SUCCESS(errorCode)) {
64            if(&dest!=&src) {
65                dest=src;
66            } else {
67                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
68            }
69        }
70        return dest;
71    }
72    virtual UnicodeString &
73    normalizeSecondAndAppend(UnicodeString &first,
74                             const UnicodeString &second,
75                             UErrorCode &errorCode) const {
76        if(U_SUCCESS(errorCode)) {
77            if(&first!=&second) {
78                first.append(second);
79            } else {
80                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
81            }
82        }
83        return first;
84    }
85    virtual UnicodeString &
86    append(UnicodeString &first,
87           const UnicodeString &second,
88           UErrorCode &errorCode) const {
89        if(U_SUCCESS(errorCode)) {
90            if(&first!=&second) {
91                first.append(second);
92            } else {
93                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
94            }
95        }
96        return first;
97    }
98    virtual UBool
99    getDecomposition(UChar32, UnicodeString &) const {
100        return FALSE;
101    }
102    // No need to override the default getRawDecomposition().
103    virtual UBool
104    isNormalized(const UnicodeString &, UErrorCode &) const {
105        return TRUE;
106    }
107    virtual UNormalizationCheckResult
108    quickCheck(const UnicodeString &, UErrorCode &) const {
109        return UNORM_YES;
110    }
111    virtual int32_t
112    spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const {
113        return s.length();
114    }
115    virtual UBool hasBoundaryBefore(UChar32) const { return TRUE; }
116    virtual UBool hasBoundaryAfter(UChar32) const { return TRUE; }
117    virtual UBool isInert(UChar32) const { return TRUE; }
118};
119
120NoopNormalizer2::~NoopNormalizer2() {}
121
122// Intermediate class:
123// Has Normalizer2Impl and does boilerplate argument checking and setup.
124class Normalizer2WithImpl : public Normalizer2 {
125public:
126    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
127    virtual ~Normalizer2WithImpl();
128
129    // normalize
130    virtual UnicodeString &
131    normalize(const UnicodeString &src,
132              UnicodeString &dest,
133              UErrorCode &errorCode) const {
134        if(U_FAILURE(errorCode)) {
135            dest.setToBogus();
136            return dest;
137        }
138        const UChar *sArray=src.getBuffer();
139        if(&dest==&src || sArray==NULL) {
140            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
141            dest.setToBogus();
142            return dest;
143        }
144        dest.remove();
145        ReorderingBuffer buffer(impl, dest);
146        if(buffer.init(src.length(), errorCode)) {
147            normalize(sArray, sArray+src.length(), buffer, errorCode);
148        }
149        return dest;
150    }
151    virtual void
152    normalize(const UChar *src, const UChar *limit,
153              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
154
155    // normalize and append
156    virtual UnicodeString &
157    normalizeSecondAndAppend(UnicodeString &first,
158                             const UnicodeString &second,
159                             UErrorCode &errorCode) const {
160        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
161    }
162    virtual UnicodeString &
163    append(UnicodeString &first,
164           const UnicodeString &second,
165           UErrorCode &errorCode) const {
166        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
167    }
168    UnicodeString &
169    normalizeSecondAndAppend(UnicodeString &first,
170                             const UnicodeString &second,
171                             UBool doNormalize,
172                             UErrorCode &errorCode) const {
173        uprv_checkCanGetBuffer(first, errorCode);
174        if(U_FAILURE(errorCode)) {
175            return first;
176        }
177        const UChar *secondArray=second.getBuffer();
178        if(&first==&second || secondArray==NULL) {
179            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
180            return first;
181        }
182        int32_t firstLength=first.length();
183        UnicodeString safeMiddle;
184        {
185            ReorderingBuffer buffer(impl, first);
186            if(buffer.init(firstLength+second.length(), errorCode)) {
187                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
188                                   safeMiddle, buffer, errorCode);
189            }
190        }  // The ReorderingBuffer destructor finalizes the first string.
191        if(U_FAILURE(errorCode)) {
192            // Restore the modified suffix of the first string.
193            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
194        }
195        return first;
196    }
197    virtual void
198    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
199                       UnicodeString &safeMiddle,
200                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
201    virtual UBool
202    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
203        UChar buffer[4];
204        int32_t length;
205        const UChar *d=impl.getDecomposition(c, buffer, length);
206        if(d==NULL) {
207            return FALSE;
208        }
209        if(d==buffer) {
210            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
211        } else {
212            decomposition.setTo(FALSE, d, length);  // read-only alias
213        }
214        return TRUE;
215    }
216    virtual UBool
217    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
218        UChar buffer[30];
219        int32_t length;
220        const UChar *d=impl.getRawDecomposition(c, buffer, length);
221        if(d==NULL) {
222            return FALSE;
223        }
224        if(d==buffer) {
225            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
226        } else {
227            decomposition.setTo(FALSE, d, length);  // read-only alias
228        }
229        return TRUE;
230    }
231    virtual UChar32
232    composePair(UChar32 a, UChar32 b) const {
233        return impl.composePair(a, b);
234    }
235
236    virtual uint8_t
237    getCombiningClass(UChar32 c) const {
238        return impl.getCC(impl.getNorm16(c));
239    }
240
241    // quick checks
242    virtual UBool
243    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
244        if(U_FAILURE(errorCode)) {
245            return FALSE;
246        }
247        const UChar *sArray=s.getBuffer();
248        if(sArray==NULL) {
249            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
250            return FALSE;
251        }
252        const UChar *sLimit=sArray+s.length();
253        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
254    }
255    virtual UNormalizationCheckResult
256    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
257        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
258    }
259    virtual int32_t
260    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
261        if(U_FAILURE(errorCode)) {
262            return 0;
263        }
264        const UChar *sArray=s.getBuffer();
265        if(sArray==NULL) {
266            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
267            return 0;
268        }
269        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
270    }
271    virtual const UChar *
272    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
273
274    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
275        return UNORM_YES;
276    }
277
278    const Normalizer2Impl &impl;
279};
280
281Normalizer2WithImpl::~Normalizer2WithImpl() {}
282
283class DecomposeNormalizer2 : public Normalizer2WithImpl {
284public:
285    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
286    virtual ~DecomposeNormalizer2();
287
288private:
289    virtual void
290    normalize(const UChar *src, const UChar *limit,
291              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
292        impl.decompose(src, limit, &buffer, errorCode);
293    }
294    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
295    virtual void
296    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
297                       UnicodeString &safeMiddle,
298                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
299        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
300    }
301    virtual const UChar *
302    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
303        return impl.decompose(src, limit, NULL, errorCode);
304    }
305    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
306    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
307        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
308    }
309    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
310    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
311    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
312};
313
314DecomposeNormalizer2::~DecomposeNormalizer2() {}
315
316class ComposeNormalizer2 : public Normalizer2WithImpl {
317public:
318    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
319        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
320    virtual ~ComposeNormalizer2();
321
322private:
323    virtual void
324    normalize(const UChar *src, const UChar *limit,
325              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
326        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
327    }
328    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
329    virtual void
330    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
331                       UnicodeString &safeMiddle,
332                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
333        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
334    }
335
336    virtual UBool
337    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
338        if(U_FAILURE(errorCode)) {
339            return FALSE;
340        }
341        const UChar *sArray=s.getBuffer();
342        if(sArray==NULL) {
343            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
344            return FALSE;
345        }
346        UnicodeString temp;
347        ReorderingBuffer buffer(impl, temp);
348        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
349            return FALSE;
350        }
351        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
352    }
353    virtual UNormalizationCheckResult
354    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
355        if(U_FAILURE(errorCode)) {
356            return UNORM_MAYBE;
357        }
358        const UChar *sArray=s.getBuffer();
359        if(sArray==NULL) {
360            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
361            return UNORM_MAYBE;
362        }
363        UNormalizationCheckResult qcResult=UNORM_YES;
364        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
365        return qcResult;
366    }
367    virtual const UChar *
368    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
369        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
370    }
371    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
372    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
373        return impl.getCompQuickCheck(impl.getNorm16(c));
374    }
375    virtual UBool hasBoundaryBefore(UChar32 c) const {
376        return impl.hasCompBoundaryBefore(c);
377    }
378    virtual UBool hasBoundaryAfter(UChar32 c) const {
379        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
380    }
381    virtual UBool isInert(UChar32 c) const {
382        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
383    }
384
385    const UBool onlyContiguous;
386};
387
388ComposeNormalizer2::~ComposeNormalizer2() {}
389
390class FCDNormalizer2 : public Normalizer2WithImpl {
391public:
392    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
393    virtual ~FCDNormalizer2();
394
395private:
396    virtual void
397    normalize(const UChar *src, const UChar *limit,
398              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
399        impl.makeFCD(src, limit, &buffer, errorCode);
400    }
401    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
402    virtual void
403    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
404                       UnicodeString &safeMiddle,
405                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
406        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
407    }
408    virtual const UChar *
409    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
410        return impl.makeFCD(src, limit, NULL, errorCode);
411    }
412    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
413    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
414    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
415    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
416};
417
418FCDNormalizer2::~FCDNormalizer2() {}
419
420// instance cache ---------------------------------------------------------- ***
421
422struct Norm2AllModes : public UMemory {
423    static Norm2AllModes *createInstance(const char *packageName,
424                                         const char *name,
425                                         UErrorCode &errorCode);
426    Norm2AllModes() : comp(impl, FALSE), decomp(impl), fcd(impl), fcc(impl, TRUE) {}
427
428    Normalizer2Impl impl;
429    ComposeNormalizer2 comp;
430    DecomposeNormalizer2 decomp;
431    FCDNormalizer2 fcd;
432    ComposeNormalizer2 fcc;
433};
434
435Norm2AllModes *
436Norm2AllModes::createInstance(const char *packageName,
437                              const char *name,
438                              UErrorCode &errorCode) {
439    if(U_FAILURE(errorCode)) {
440        return NULL;
441    }
442    LocalPointer<Norm2AllModes> allModes(new Norm2AllModes);
443    if(allModes.isNull()) {
444        errorCode=U_MEMORY_ALLOCATION_ERROR;
445        return NULL;
446    }
447    allModes->impl.load(packageName, name, errorCode);
448    return U_SUCCESS(errorCode) ? allModes.orphan() : NULL;
449}
450
451U_CDECL_BEGIN
452static UBool U_CALLCONV uprv_normalizer2_cleanup();
453U_CDECL_END
454
455class Norm2AllModesSingleton : public TriStateSingletonWrapper<Norm2AllModes> {
456public:
457    Norm2AllModesSingleton(TriStateSingleton &s, const char *n) :
458        TriStateSingletonWrapper<Norm2AllModes>(s), name(n) {}
459    Norm2AllModes *getInstance(UErrorCode &errorCode) {
460        return TriStateSingletonWrapper<Norm2AllModes>::getInstance(createInstance, name, errorCode);
461    }
462private:
463    static void *createInstance(const void *context, UErrorCode &errorCode) {
464        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
465        return Norm2AllModes::createInstance(NULL, (const char *)context, errorCode);
466    }
467
468    const char *name;
469};
470
471STATIC_TRI_STATE_SINGLETON(nfcSingleton);
472STATIC_TRI_STATE_SINGLETON(nfkcSingleton);
473STATIC_TRI_STATE_SINGLETON(nfkc_cfSingleton);
474
475class Norm2Singleton : public SimpleSingletonWrapper<Normalizer2> {
476public:
477    Norm2Singleton(SimpleSingleton &s) : SimpleSingletonWrapper<Normalizer2>(s) {}
478    Normalizer2 *getInstance(UErrorCode &errorCode) {
479        return SimpleSingletonWrapper<Normalizer2>::getInstance(createInstance, NULL, errorCode);
480    }
481private:
482    static void *createInstance(const void *, UErrorCode &errorCode) {
483        Normalizer2 *noop=new NoopNormalizer2;
484        if(noop==NULL) {
485            errorCode=U_MEMORY_ALLOCATION_ERROR;
486        }
487        ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup);
488        return noop;
489    }
490};
491
492STATIC_SIMPLE_SINGLETON(noopSingleton);
493
494static UHashtable *cache=NULL;
495
496U_CDECL_BEGIN
497
498static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
499    delete (Norm2AllModes *)allModes;
500}
501
502static UBool U_CALLCONV uprv_normalizer2_cleanup() {
503    Norm2AllModesSingleton(nfcSingleton, NULL).deleteInstance();
504    Norm2AllModesSingleton(nfkcSingleton, NULL).deleteInstance();
505    Norm2AllModesSingleton(nfkc_cfSingleton, NULL).deleteInstance();
506    Norm2Singleton(noopSingleton).deleteInstance();
507    uhash_close(cache);
508    cache=NULL;
509    return TRUE;
510}
511
512U_CDECL_END
513
514const Normalizer2 *Normalizer2Factory::getNFCInstance(UErrorCode &errorCode) {
515    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
516    return allModes!=NULL ? &allModes->comp : NULL;
517}
518
519const Normalizer2 *Normalizer2Factory::getNFDInstance(UErrorCode &errorCode) {
520    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
521    return allModes!=NULL ? &allModes->decomp : NULL;
522}
523
524const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
525    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
526    return allModes!=NULL ? &allModes->fcd : NULL;
527}
528
529const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
530    Norm2AllModes *allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
531    return allModes!=NULL ? &allModes->fcc : NULL;
532}
533
534const Normalizer2 *Normalizer2Factory::getNFKCInstance(UErrorCode &errorCode) {
535    Norm2AllModes *allModes=
536        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
537    return allModes!=NULL ? &allModes->comp : NULL;
538}
539
540const Normalizer2 *Normalizer2Factory::getNFKDInstance(UErrorCode &errorCode) {
541    Norm2AllModes *allModes=
542        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
543    return allModes!=NULL ? &allModes->decomp : NULL;
544}
545
546const Normalizer2 *Normalizer2Factory::getNFKC_CFInstance(UErrorCode &errorCode) {
547    Norm2AllModes *allModes=
548        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
549    return allModes!=NULL ? &allModes->comp : NULL;
550}
551
552const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) {
553    return Norm2Singleton(noopSingleton).getInstance(errorCode);
554}
555
556const Normalizer2 *
557Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
558    if(U_FAILURE(errorCode)) {
559        return NULL;
560    }
561    switch(mode) {
562    case UNORM_NFD:
563        return getNFDInstance(errorCode);
564    case UNORM_NFKD:
565        return getNFKDInstance(errorCode);
566    case UNORM_NFC:
567        return getNFCInstance(errorCode);
568    case UNORM_NFKC:
569        return getNFKCInstance(errorCode);
570    case UNORM_FCD:
571        return getFCDInstance(errorCode);
572    default:  // UNORM_NONE
573        return getNoopInstance(errorCode);
574    }
575}
576
577const Normalizer2Impl *
578Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
579    Norm2AllModes *allModes=
580        Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
581    return allModes!=NULL ? &allModes->impl : NULL;
582}
583
584const Normalizer2Impl *
585Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
586    Norm2AllModes *allModes=
587        Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
588    return allModes!=NULL ? &allModes->impl : NULL;
589}
590
591const Normalizer2Impl *
592Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
593    Norm2AllModes *allModes=
594        Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
595    return allModes!=NULL ? &allModes->impl : NULL;
596}
597
598const Normalizer2Impl *
599Normalizer2Factory::getImpl(const Normalizer2 *norm2) {
600    return &((Normalizer2WithImpl *)norm2)->impl;
601}
602
603const Normalizer2 *
604Normalizer2::getNFCInstance(UErrorCode &errorCode) {
605    return Normalizer2Factory::getNFCInstance(errorCode);
606}
607
608const Normalizer2 *
609Normalizer2::getNFDInstance(UErrorCode &errorCode) {
610    return Normalizer2Factory::getNFDInstance(errorCode);
611}
612
613const Normalizer2 *
614Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
615    return Normalizer2Factory::getNFKCInstance(errorCode);
616}
617
618const Normalizer2 *
619Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
620    return Normalizer2Factory::getNFKDInstance(errorCode);
621}
622
623const Normalizer2 *
624Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
625    return Normalizer2Factory::getNFKC_CFInstance(errorCode);
626}
627
628const Normalizer2 *
629Normalizer2::getInstance(const char *packageName,
630                         const char *name,
631                         UNormalization2Mode mode,
632                         UErrorCode &errorCode) {
633    if(U_FAILURE(errorCode)) {
634        return NULL;
635    }
636    if(name==NULL || *name==0) {
637        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
638        return NULL;
639    }
640    Norm2AllModes *allModes=NULL;
641    if(packageName==NULL) {
642        if(0==uprv_strcmp(name, "nfc")) {
643            allModes=Norm2AllModesSingleton(nfcSingleton, "nfc").getInstance(errorCode);
644        } else if(0==uprv_strcmp(name, "nfkc")) {
645            allModes=Norm2AllModesSingleton(nfkcSingleton, "nfkc").getInstance(errorCode);
646        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
647            allModes=Norm2AllModesSingleton(nfkc_cfSingleton, "nfkc_cf").getInstance(errorCode);
648        }
649    }
650    if(allModes==NULL && U_SUCCESS(errorCode)) {
651        {
652            Mutex lock;
653            if(cache!=NULL) {
654                allModes=(Norm2AllModes *)uhash_get(cache, name);
655            }
656        }
657        if(allModes==NULL) {
658            LocalPointer<Norm2AllModes> localAllModes(
659                Norm2AllModes::createInstance(packageName, name, errorCode));
660            if(U_SUCCESS(errorCode)) {
661                Mutex lock;
662                if(cache==NULL) {
663                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
664                    if(U_FAILURE(errorCode)) {
665                        return NULL;
666                    }
667                    uhash_setKeyDeleter(cache, uprv_free);
668                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
669                }
670                void *temp=uhash_get(cache, name);
671                if(temp==NULL) {
672                    int32_t keyLength=uprv_strlen(name)+1;
673                    char *nameCopy=(char *)uprv_malloc(keyLength);
674                    if(nameCopy==NULL) {
675                        errorCode=U_MEMORY_ALLOCATION_ERROR;
676                        return NULL;
677                    }
678                    uprv_memcpy(nameCopy, name, keyLength);
679                    uhash_put(cache, nameCopy, allModes=localAllModes.orphan(), &errorCode);
680                } else {
681                    // race condition
682                    allModes=(Norm2AllModes *)temp;
683                }
684            }
685        }
686    }
687    if(allModes!=NULL && U_SUCCESS(errorCode)) {
688        switch(mode) {
689        case UNORM2_COMPOSE:
690            return &allModes->comp;
691        case UNORM2_DECOMPOSE:
692            return &allModes->decomp;
693        case UNORM2_FCD:
694            return &allModes->fcd;
695        case UNORM2_COMPOSE_CONTIGUOUS:
696            return &allModes->fcc;
697        default:
698            break;  // do nothing
699        }
700    }
701    return NULL;
702}
703
704U_NAMESPACE_END
705
706// C API ------------------------------------------------------------------- ***
707
708U_NAMESPACE_USE
709
710U_CAPI const UNormalizer2 * U_EXPORT2
711unorm2_getNFCInstance(UErrorCode *pErrorCode) {
712    return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode);
713}
714
715U_CAPI const UNormalizer2 * U_EXPORT2
716unorm2_getNFDInstance(UErrorCode *pErrorCode) {
717    return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode);
718}
719
720U_CAPI const UNormalizer2 * U_EXPORT2
721unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
722    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
723}
724
725U_CAPI const UNormalizer2 * U_EXPORT2
726unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
727    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
728}
729
730U_CAPI const UNormalizer2 * U_EXPORT2
731unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
732    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
733}
734
735U_CAPI const UNormalizer2 * U_EXPORT2
736unorm2_getInstance(const char *packageName,
737                   const char *name,
738                   UNormalization2Mode mode,
739                   UErrorCode *pErrorCode) {
740    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
741}
742
743U_CAPI void U_EXPORT2
744unorm2_close(UNormalizer2 *norm2) {
745    delete (Normalizer2 *)norm2;
746}
747
748U_CAPI int32_t U_EXPORT2
749unorm2_normalize(const UNormalizer2 *norm2,
750                 const UChar *src, int32_t length,
751                 UChar *dest, int32_t capacity,
752                 UErrorCode *pErrorCode) {
753    if(U_FAILURE(*pErrorCode)) {
754        return 0;
755    }
756    if( (src==NULL ? length!=0 : length<-1) ||
757        (dest==NULL ? capacity!=0 : capacity<0) ||
758        (src==dest && src!=NULL)
759    ) {
760        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
761        return 0;
762    }
763    UnicodeString destString(dest, 0, capacity);
764    // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash.
765    if(length!=0) {
766        const Normalizer2 *n2=(const Normalizer2 *)norm2;
767        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
768        if(n2wi!=NULL) {
769            // Avoid duplicate argument checking and support NUL-terminated src.
770            ReorderingBuffer buffer(n2wi->impl, destString);
771            if(buffer.init(length, *pErrorCode)) {
772                n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode);
773            }
774        } else {
775            UnicodeString srcString(length<0, src, length);
776            n2->normalize(srcString, destString, *pErrorCode);
777        }
778    }
779    return destString.extract(dest, capacity, *pErrorCode);
780}
781
782static int32_t
783normalizeSecondAndAppend(const UNormalizer2 *norm2,
784                         UChar *first, int32_t firstLength, int32_t firstCapacity,
785                         const UChar *second, int32_t secondLength,
786                         UBool doNormalize,
787                         UErrorCode *pErrorCode) {
788    if(U_FAILURE(*pErrorCode)) {
789        return 0;
790    }
791    if( (second==NULL ? secondLength!=0 : secondLength<-1) ||
792        (first==NULL ? (firstCapacity!=0 || firstLength!=0) :
793                       (firstCapacity<0 || firstLength<-1)) ||
794        (first==second && first!=NULL)
795    ) {
796        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
797        return 0;
798    }
799    UnicodeString firstString(first, firstLength, firstCapacity);
800    firstLength=firstString.length();  // In case it was -1.
801    // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash.
802    if(secondLength!=0) {
803        const Normalizer2 *n2=(const Normalizer2 *)norm2;
804        const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2);
805        if(n2wi!=NULL) {
806            // Avoid duplicate argument checking and support NUL-terminated src.
807            UnicodeString safeMiddle;
808            {
809                ReorderingBuffer buffer(n2wi->impl, firstString);
810                if(buffer.init(firstLength+secondLength+1, *pErrorCode)) {  // destCapacity>=-1
811                    n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL,
812                                             doNormalize, safeMiddle, buffer, *pErrorCode);
813                }
814            }  // The ReorderingBuffer destructor finalizes firstString.
815            if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) {
816                // Restore the modified suffix of the first string.
817                // This does not restore first[] array contents between firstLength and firstCapacity.
818                // (That might be uninitialized memory, as far as we know.)
819                if(first!=NULL) { /* don't dereference NULL */
820                  safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length());
821                  if(firstLength<firstCapacity) {
822                    first[firstLength]=0;  // NUL-terminate in case it was originally.
823                  }
824                }
825            }
826        } else {
827            UnicodeString secondString(secondLength<0, second, secondLength);
828            if(doNormalize) {
829                n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode);
830            } else {
831                n2->append(firstString, secondString, *pErrorCode);
832            }
833        }
834    }
835    return firstString.extract(first, firstCapacity, *pErrorCode);
836}
837
838U_CAPI int32_t U_EXPORT2
839unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
840                                UChar *first, int32_t firstLength, int32_t firstCapacity,
841                                const UChar *second, int32_t secondLength,
842                                UErrorCode *pErrorCode) {
843    return normalizeSecondAndAppend(norm2,
844                                    first, firstLength, firstCapacity,
845                                    second, secondLength,
846                                    TRUE, pErrorCode);
847}
848
849U_CAPI int32_t U_EXPORT2
850unorm2_append(const UNormalizer2 *norm2,
851              UChar *first, int32_t firstLength, int32_t firstCapacity,
852              const UChar *second, int32_t secondLength,
853              UErrorCode *pErrorCode) {
854    return normalizeSecondAndAppend(norm2,
855                                    first, firstLength, firstCapacity,
856                                    second, secondLength,
857                                    FALSE, pErrorCode);
858}
859
860U_CAPI int32_t U_EXPORT2
861unorm2_getDecomposition(const UNormalizer2 *norm2,
862                        UChar32 c, UChar *decomposition, int32_t capacity,
863                        UErrorCode *pErrorCode) {
864    if(U_FAILURE(*pErrorCode)) {
865        return 0;
866    }
867    if(decomposition==NULL ? capacity!=0 : capacity<0) {
868        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
869        return 0;
870    }
871    UnicodeString destString(decomposition, 0, capacity);
872    if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) {
873        return destString.extract(decomposition, capacity, *pErrorCode);
874    } else {
875        return -1;
876    }
877}
878
879U_CAPI int32_t U_EXPORT2
880unorm2_getRawDecomposition(const UNormalizer2 *norm2,
881                           UChar32 c, UChar *decomposition, int32_t capacity,
882                           UErrorCode *pErrorCode) {
883    if(U_FAILURE(*pErrorCode)) {
884        return 0;
885    }
886    if(decomposition==NULL ? capacity!=0 : capacity<0) {
887        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
888        return 0;
889    }
890    UnicodeString destString(decomposition, 0, capacity);
891    if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) {
892        return destString.extract(decomposition, capacity, *pErrorCode);
893    } else {
894        return -1;
895    }
896}
897
898U_CAPI UChar32 U_EXPORT2
899unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) {
900    return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b);
901}
902
903U_CAPI uint8_t U_EXPORT2
904unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) {
905    return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c);
906}
907
908U_CAPI UBool U_EXPORT2
909unorm2_isNormalized(const UNormalizer2 *norm2,
910                    const UChar *s, int32_t length,
911                    UErrorCode *pErrorCode) {
912    if(U_FAILURE(*pErrorCode)) {
913        return 0;
914    }
915    if((s==NULL && length!=0) || length<-1) {
916        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
917        return 0;
918    }
919    UnicodeString sString(length<0, s, length);
920    return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode);
921}
922
923U_CAPI UNormalizationCheckResult U_EXPORT2
924unorm2_quickCheck(const UNormalizer2 *norm2,
925                  const UChar *s, int32_t length,
926                  UErrorCode *pErrorCode) {
927    if(U_FAILURE(*pErrorCode)) {
928        return UNORM_NO;
929    }
930    if((s==NULL && length!=0) || length<-1) {
931        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
932        return UNORM_NO;
933    }
934    UnicodeString sString(length<0, s, length);
935    return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode);
936}
937
938U_CAPI int32_t U_EXPORT2
939unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
940                         const UChar *s, int32_t length,
941                         UErrorCode *pErrorCode) {
942    if(U_FAILURE(*pErrorCode)) {
943        return 0;
944    }
945    if((s==NULL && length!=0) || length<-1) {
946        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
947        return 0;
948    }
949    UnicodeString sString(length<0, s, length);
950    return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode);
951}
952
953U_CAPI UBool U_EXPORT2
954unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) {
955    return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c);
956}
957
958U_CAPI UBool U_EXPORT2
959unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) {
960    return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c);
961}
962
963U_CAPI UBool U_EXPORT2
964unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) {
965    return ((const Normalizer2 *)norm2)->isInert(c);
966}
967
968// Some properties APIs ---------------------------------------------------- ***
969
970U_CAPI uint8_t U_EXPORT2
971u_getCombiningClass(UChar32 c) {
972    UErrorCode errorCode=U_ZERO_ERROR;
973    const Normalizer2 *nfd=Normalizer2Factory::getNFDInstance(errorCode);
974    if(U_SUCCESS(errorCode)) {
975        return nfd->getCombiningClass(c);
976    } else {
977        return 0;
978    }
979}
980
981U_CFUNC UNormalizationCheckResult
982unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
983    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
984        return UNORM_YES;
985    }
986    UErrorCode errorCode=U_ZERO_ERROR;
987    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
988    if(U_SUCCESS(errorCode)) {
989        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
990    } else {
991        return UNORM_MAYBE;
992    }
993}
994
995U_CFUNC uint16_t
996unorm_getFCD16(UChar32 c) {
997    UErrorCode errorCode=U_ZERO_ERROR;
998    const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
999    if(U_SUCCESS(errorCode)) {
1000        return impl->getFCD16(c);
1001    } else {
1002        return 0;
1003    }
1004}
1005
1006#endif  // !UCONFIG_NO_NORMALIZATION
1007