1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 2014, International Business Machines 6* Corporation and others. All Rights Reserved. 7******************************************************************************* 8* loadednormalizer2impl.h 9* 10* created on: 2014sep07 11* created by: Markus W. Scherer 12*/ 13 14#ifndef __NORM2ALLMODES_H__ 15#define __NORM2ALLMODES_H__ 16 17#include "unicode/utypes.h" 18 19#if !UCONFIG_NO_NORMALIZATION 20 21#include "unicode/normalizer2.h" 22#include "unicode/unistr.h" 23#include "cpputils.h" 24#include "normalizer2impl.h" 25 26U_NAMESPACE_BEGIN 27 28// Intermediate class: 29// Has Normalizer2Impl and does boilerplate argument checking and setup. 30class Normalizer2WithImpl : public Normalizer2 { 31public: 32 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 33 virtual ~Normalizer2WithImpl(); 34 35 // normalize 36 virtual UnicodeString & 37 normalize(const UnicodeString &src, 38 UnicodeString &dest, 39 UErrorCode &errorCode) const { 40 if(U_FAILURE(errorCode)) { 41 dest.setToBogus(); 42 return dest; 43 } 44 const UChar *sArray=src.getBuffer(); 45 if(&dest==&src || sArray==NULL) { 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 47 dest.setToBogus(); 48 return dest; 49 } 50 dest.remove(); 51 ReorderingBuffer buffer(impl, dest); 52 if(buffer.init(src.length(), errorCode)) { 53 normalize(sArray, sArray+src.length(), buffer, errorCode); 54 } 55 return dest; 56 } 57 virtual void 58 normalize(const UChar *src, const UChar *limit, 59 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 60 61 // normalize and append 62 virtual UnicodeString & 63 normalizeSecondAndAppend(UnicodeString &first, 64 const UnicodeString &second, 65 UErrorCode &errorCode) const { 66 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 67 } 68 virtual UnicodeString & 69 append(UnicodeString &first, 70 const UnicodeString &second, 71 UErrorCode &errorCode) const { 72 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 73 } 74 UnicodeString & 75 normalizeSecondAndAppend(UnicodeString &first, 76 const UnicodeString &second, 77 UBool doNormalize, 78 UErrorCode &errorCode) const { 79 uprv_checkCanGetBuffer(first, errorCode); 80 if(U_FAILURE(errorCode)) { 81 return first; 82 } 83 const UChar *secondArray=second.getBuffer(); 84 if(&first==&second || secondArray==NULL) { 85 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 86 return first; 87 } 88 int32_t firstLength=first.length(); 89 UnicodeString safeMiddle; 90 { 91 ReorderingBuffer buffer(impl, first); 92 if(buffer.init(firstLength+second.length(), errorCode)) { 93 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 94 safeMiddle, buffer, errorCode); 95 } 96 } // The ReorderingBuffer destructor finalizes the first string. 97 if(U_FAILURE(errorCode)) { 98 // Restore the modified suffix of the first string. 99 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 100 } 101 return first; 102 } 103 virtual void 104 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 105 UnicodeString &safeMiddle, 106 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 107 virtual UBool 108 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 109 UChar buffer[4]; 110 int32_t length; 111 const UChar *d=impl.getDecomposition(c, buffer, length); 112 if(d==NULL) { 113 return FALSE; 114 } 115 if(d==buffer) { 116 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 117 } else { 118 decomposition.setTo(FALSE, d, length); // read-only alias 119 } 120 return TRUE; 121 } 122 virtual UBool 123 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 124 UChar buffer[30]; 125 int32_t length; 126 const UChar *d=impl.getRawDecomposition(c, buffer, length); 127 if(d==NULL) { 128 return FALSE; 129 } 130 if(d==buffer) { 131 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 132 } else { 133 decomposition.setTo(FALSE, d, length); // read-only alias 134 } 135 return TRUE; 136 } 137 virtual UChar32 138 composePair(UChar32 a, UChar32 b) const { 139 return impl.composePair(a, b); 140 } 141 142 virtual uint8_t 143 getCombiningClass(UChar32 c) const { 144 return impl.getCC(impl.getNorm16(c)); 145 } 146 147 // quick checks 148 virtual UBool 149 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 150 if(U_FAILURE(errorCode)) { 151 return FALSE; 152 } 153 const UChar *sArray=s.getBuffer(); 154 if(sArray==NULL) { 155 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 156 return FALSE; 157 } 158 const UChar *sLimit=sArray+s.length(); 159 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 160 } 161 virtual UNormalizationCheckResult 162 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 163 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 164 } 165 virtual int32_t 166 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 167 if(U_FAILURE(errorCode)) { 168 return 0; 169 } 170 const UChar *sArray=s.getBuffer(); 171 if(sArray==NULL) { 172 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 173 return 0; 174 } 175 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 176 } 177 virtual const UChar * 178 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 179 180 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 181 return UNORM_YES; 182 } 183 184 const Normalizer2Impl &impl; 185}; 186 187class DecomposeNormalizer2 : public Normalizer2WithImpl { 188public: 189 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 190 virtual ~DecomposeNormalizer2(); 191 192private: 193 virtual void 194 normalize(const UChar *src, const UChar *limit, 195 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 196 impl.decompose(src, limit, &buffer, errorCode); 197 } 198 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 199 virtual void 200 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 201 UnicodeString &safeMiddle, 202 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 203 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 204 } 205 virtual const UChar * 206 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 207 return impl.decompose(src, limit, NULL, errorCode); 208 } 209 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 210 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 211 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 212 } 213 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } 214 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } 215 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 216}; 217 218class ComposeNormalizer2 : public Normalizer2WithImpl { 219public: 220 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 221 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 222 virtual ~ComposeNormalizer2(); 223 224private: 225 virtual void 226 normalize(const UChar *src, const UChar *limit, 227 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 228 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 229 } 230 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 231 virtual void 232 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 233 UnicodeString &safeMiddle, 234 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 235 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 236 } 237 238 virtual UBool 239 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 240 if(U_FAILURE(errorCode)) { 241 return FALSE; 242 } 243 const UChar *sArray=s.getBuffer(); 244 if(sArray==NULL) { 245 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 246 return FALSE; 247 } 248 UnicodeString temp; 249 ReorderingBuffer buffer(impl, temp); 250 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 251 return FALSE; 252 } 253 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 254 } 255 virtual UNormalizationCheckResult 256 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 257 if(U_FAILURE(errorCode)) { 258 return UNORM_MAYBE; 259 } 260 const UChar *sArray=s.getBuffer(); 261 if(sArray==NULL) { 262 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 263 return UNORM_MAYBE; 264 } 265 UNormalizationCheckResult qcResult=UNORM_YES; 266 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 267 return qcResult; 268 } 269 virtual const UChar * 270 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 271 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 272 } 273 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 274 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 275 return impl.getCompQuickCheck(impl.getNorm16(c)); 276 } 277 virtual UBool hasBoundaryBefore(UChar32 c) const { 278 return impl.hasCompBoundaryBefore(c); 279 } 280 virtual UBool hasBoundaryAfter(UChar32 c) const { 281 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 282 } 283 virtual UBool isInert(UChar32 c) const { 284 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 285 } 286 287 const UBool onlyContiguous; 288}; 289 290class FCDNormalizer2 : public Normalizer2WithImpl { 291public: 292 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 293 virtual ~FCDNormalizer2(); 294 295private: 296 virtual void 297 normalize(const UChar *src, const UChar *limit, 298 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 299 impl.makeFCD(src, limit, &buffer, errorCode); 300 } 301 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 302 virtual void 303 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 304 UnicodeString &safeMiddle, 305 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 306 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 307 } 308 virtual const UChar * 309 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 310 return impl.makeFCD(src, limit, NULL, errorCode); 311 } 312 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. 313 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } 314 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } 315 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 316}; 317 318struct Norm2AllModes : public UMemory { 319 Norm2AllModes(Normalizer2Impl *i) 320 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} 321 ~Norm2AllModes(); 322 323 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); 324 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); 325 static Norm2AllModes *createInstance(const char *packageName, 326 const char *name, 327 UErrorCode &errorCode); 328 329 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); 330 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); 331 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); 332 333 Normalizer2Impl *impl; 334 ComposeNormalizer2 comp; 335 DecomposeNormalizer2 decomp; 336 FCDNormalizer2 fcd; 337 ComposeNormalizer2 fcc; 338}; 339 340U_NAMESPACE_END 341 342#endif // !UCONFIG_NO_NORMALIZATION 343#endif // __NORM2ALLMODES_H__ 344