1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2014, International Business Machines
6* Corporation and others.  All Rights Reserved.
7*******************************************************************************
8* loadednormalizer2impl.h
9*
10* created on: 2014sep07
11* created by: Markus W. Scherer
12*/
13
14#ifndef __NORM2ALLMODES_H__
15#define __NORM2ALLMODES_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/normalizer2.h"
22#include "unicode/unistr.h"
23#include "cpputils.h"
24#include "normalizer2impl.h"
25
26U_NAMESPACE_BEGIN
27
28// Intermediate class:
29// Has Normalizer2Impl and does boilerplate argument checking and setup.
30class Normalizer2WithImpl : public Normalizer2 {
31public:
32    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
33    virtual ~Normalizer2WithImpl();
34
35    // normalize
36    virtual UnicodeString &
37    normalize(const UnicodeString &src,
38              UnicodeString &dest,
39              UErrorCode &errorCode) const {
40        if(U_FAILURE(errorCode)) {
41            dest.setToBogus();
42            return dest;
43        }
44        const UChar *sArray=src.getBuffer();
45        if(&dest==&src || sArray==NULL) {
46            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47            dest.setToBogus();
48            return dest;
49        }
50        dest.remove();
51        ReorderingBuffer buffer(impl, dest);
52        if(buffer.init(src.length(), errorCode)) {
53            normalize(sArray, sArray+src.length(), buffer, errorCode);
54        }
55        return dest;
56    }
57    virtual void
58    normalize(const UChar *src, const UChar *limit,
59              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
60
61    // normalize and append
62    virtual UnicodeString &
63    normalizeSecondAndAppend(UnicodeString &first,
64                             const UnicodeString &second,
65                             UErrorCode &errorCode) const {
66        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
67    }
68    virtual UnicodeString &
69    append(UnicodeString &first,
70           const UnicodeString &second,
71           UErrorCode &errorCode) const {
72        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
73    }
74    UnicodeString &
75    normalizeSecondAndAppend(UnicodeString &first,
76                             const UnicodeString &second,
77                             UBool doNormalize,
78                             UErrorCode &errorCode) const {
79        uprv_checkCanGetBuffer(first, errorCode);
80        if(U_FAILURE(errorCode)) {
81            return first;
82        }
83        const UChar *secondArray=second.getBuffer();
84        if(&first==&second || secondArray==NULL) {
85            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
86            return first;
87        }
88        int32_t firstLength=first.length();
89        UnicodeString safeMiddle;
90        {
91            ReorderingBuffer buffer(impl, first);
92            if(buffer.init(firstLength+second.length(), errorCode)) {
93                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
94                                   safeMiddle, buffer, errorCode);
95            }
96        }  // The ReorderingBuffer destructor finalizes the first string.
97        if(U_FAILURE(errorCode)) {
98            // Restore the modified suffix of the first string.
99            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
100        }
101        return first;
102    }
103    virtual void
104    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
105                       UnicodeString &safeMiddle,
106                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
107    virtual UBool
108    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
109        UChar buffer[4];
110        int32_t length;
111        const UChar *d=impl.getDecomposition(c, buffer, length);
112        if(d==NULL) {
113            return FALSE;
114        }
115        if(d==buffer) {
116            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
117        } else {
118            decomposition.setTo(FALSE, d, length);  // read-only alias
119        }
120        return TRUE;
121    }
122    virtual UBool
123    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
124        UChar buffer[30];
125        int32_t length;
126        const UChar *d=impl.getRawDecomposition(c, buffer, length);
127        if(d==NULL) {
128            return FALSE;
129        }
130        if(d==buffer) {
131            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
132        } else {
133            decomposition.setTo(FALSE, d, length);  // read-only alias
134        }
135        return TRUE;
136    }
137    virtual UChar32
138    composePair(UChar32 a, UChar32 b) const {
139        return impl.composePair(a, b);
140    }
141
142    virtual uint8_t
143    getCombiningClass(UChar32 c) const {
144        return impl.getCC(impl.getNorm16(c));
145    }
146
147    // quick checks
148    virtual UBool
149    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
150        if(U_FAILURE(errorCode)) {
151            return FALSE;
152        }
153        const UChar *sArray=s.getBuffer();
154        if(sArray==NULL) {
155            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156            return FALSE;
157        }
158        const UChar *sLimit=sArray+s.length();
159        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
160    }
161    virtual UNormalizationCheckResult
162    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
163        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
164    }
165    virtual int32_t
166    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
167        if(U_FAILURE(errorCode)) {
168            return 0;
169        }
170        const UChar *sArray=s.getBuffer();
171        if(sArray==NULL) {
172            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
173            return 0;
174        }
175        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
176    }
177    virtual const UChar *
178    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
179
180    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
181        return UNORM_YES;
182    }
183
184    const Normalizer2Impl &impl;
185};
186
187class DecomposeNormalizer2 : public Normalizer2WithImpl {
188public:
189    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
190    virtual ~DecomposeNormalizer2();
191
192private:
193    virtual void
194    normalize(const UChar *src, const UChar *limit,
195              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
196        impl.decompose(src, limit, &buffer, errorCode);
197    }
198    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
199    virtual void
200    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
201                       UnicodeString &safeMiddle,
202                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
203        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
204    }
205    virtual const UChar *
206    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
207        return impl.decompose(src, limit, NULL, errorCode);
208    }
209    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
210    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
211        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
212    }
213    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
214    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
215    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
216};
217
218class ComposeNormalizer2 : public Normalizer2WithImpl {
219public:
220    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
221        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
222    virtual ~ComposeNormalizer2();
223
224private:
225    virtual void
226    normalize(const UChar *src, const UChar *limit,
227              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
229    }
230    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
231    virtual void
232    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
233                       UnicodeString &safeMiddle,
234                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
235        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
236    }
237
238    virtual UBool
239    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
240        if(U_FAILURE(errorCode)) {
241            return FALSE;
242        }
243        const UChar *sArray=s.getBuffer();
244        if(sArray==NULL) {
245            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
246            return FALSE;
247        }
248        UnicodeString temp;
249        ReorderingBuffer buffer(impl, temp);
250        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
251            return FALSE;
252        }
253        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
254    }
255    virtual UNormalizationCheckResult
256    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
257        if(U_FAILURE(errorCode)) {
258            return UNORM_MAYBE;
259        }
260        const UChar *sArray=s.getBuffer();
261        if(sArray==NULL) {
262            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
263            return UNORM_MAYBE;
264        }
265        UNormalizationCheckResult qcResult=UNORM_YES;
266        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
267        return qcResult;
268    }
269    virtual const UChar *
270    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
271        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
272    }
273    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
274    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
275        return impl.getCompQuickCheck(impl.getNorm16(c));
276    }
277    virtual UBool hasBoundaryBefore(UChar32 c) const {
278        return impl.hasCompBoundaryBefore(c);
279    }
280    virtual UBool hasBoundaryAfter(UChar32 c) const {
281        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
282    }
283    virtual UBool isInert(UChar32 c) const {
284        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
285    }
286
287    const UBool onlyContiguous;
288};
289
290class FCDNormalizer2 : public Normalizer2WithImpl {
291public:
292    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
293    virtual ~FCDNormalizer2();
294
295private:
296    virtual void
297    normalize(const UChar *src, const UChar *limit,
298              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
299        impl.makeFCD(src, limit, &buffer, errorCode);
300    }
301    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
302    virtual void
303    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
304                       UnicodeString &safeMiddle,
305                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
306        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
307    }
308    virtual const UChar *
309    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
310        return impl.makeFCD(src, limit, NULL, errorCode);
311    }
312    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
313    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
314    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
315    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
316};
317
318struct Norm2AllModes : public UMemory {
319    Norm2AllModes(Normalizer2Impl *i)
320            : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
321    ~Norm2AllModes();
322
323    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
324    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
325    static Norm2AllModes *createInstance(const char *packageName,
326                                         const char *name,
327                                         UErrorCode &errorCode);
328
329    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
330    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
331    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
332
333    Normalizer2Impl *impl;
334    ComposeNormalizer2 comp;
335    DecomposeNormalizer2 decomp;
336    FCDNormalizer2 fcd;
337    ComposeNormalizer2 fcc;
338};
339
340U_NAMESPACE_END
341
342#endif  // !UCONFIG_NO_NORMALIZATION
343#endif  // __NORM2ALLMODES_H__
344