tstnorm.cpp revision 50294ead5e5d23f5bbfed76e00e6b510bd41eee1
1/******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 7#include "unicode/utypes.h" 8 9#if !UCONFIG_NO_NORMALIZATION 10 11#include "unicode/uchar.h" 12#include "unicode/errorcode.h" 13#include "unicode/normlzr.h" 14#include "unicode/uniset.h" 15#include "unicode/usetiter.h" 16#include "unicode/schriter.h" 17#include "cstring.h" 18#include "unormimp.h" 19#include "tstnorm.h" 20 21#define LENGTHOF(array) ((int32_t)(sizeof(array)/sizeof((array)[0]))) 22#define ARRAY_LENGTH(array) LENGTHOF(array) 23 24#define CASE(id,test) case id: \ 25 name = #test; \ 26 if (exec) { \ 27 logln(#test "---"); \ 28 logln((UnicodeString)""); \ 29 test(); \ 30 } \ 31 break 32 33static UErrorCode status = U_ZERO_ERROR; 34 35void BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec, 36 const char* &name, char* /*par*/) { 37 switch (index) { 38 CASE(0,TestDecomp); 39 CASE(1,TestCompatDecomp); 40 CASE(2,TestCanonCompose); 41 CASE(3,TestCompatCompose); 42 CASE(4,TestPrevious); 43 CASE(5,TestHangulDecomp); 44 CASE(6,TestHangulCompose); 45 CASE(7,TestTibetan); 46 CASE(8,TestCompositionExclusion); 47 CASE(9,TestZeroIndex); 48 CASE(10,TestVerisign); 49 CASE(11,TestPreviousNext); 50 CASE(12,TestNormalizerAPI); 51 CASE(13,TestConcatenate); 52 CASE(14,FindFoldFCDExceptions); 53 CASE(15,TestCompare); 54 CASE(16,TestSkippable); 55 CASE(17,TestCustomComp); 56 CASE(18,TestCustomFCC); 57 default: name = ""; break; 58 } 59} 60 61/** 62 * Convert Java-style strings with \u Unicode escapes into UnicodeString objects 63 */ 64static UnicodeString str(const char *input) 65{ 66 UnicodeString str(input, ""); // Invariant conversion 67 return str.unescape(); 68} 69 70 71BasicNormalizerTest::BasicNormalizerTest() 72{ 73 // canonTest 74 // Input Decomposed Composed 75 76 canonTests[0][0] = str("cat"); canonTests[0][1] = str("cat"); canonTests[0][2] = str("cat"); 77 78 canonTests[1][0] = str("\\u00e0ardvark"); canonTests[1][1] = str("a\\u0300ardvark"); canonTests[1][2] = str("\\u00e0ardvark"); 79 80 canonTests[2][0] = str("\\u1e0a"); canonTests[2][1] = str("D\\u0307"); canonTests[2][2] = str("\\u1e0a"); // D-dot_above 81 82 canonTests[3][0] = str("D\\u0307"); canonTests[3][1] = str("D\\u0307"); canonTests[3][2] = str("\\u1e0a"); // D dot_above 83 84 canonTests[4][0] = str("\\u1e0c\\u0307"); canonTests[4][1] = str("D\\u0323\\u0307"); canonTests[4][2] = str("\\u1e0c\\u0307"); // D-dot_below dot_above 85 86 canonTests[5][0] = str("\\u1e0a\\u0323"); canonTests[5][1] = str("D\\u0323\\u0307"); canonTests[5][2] = str("\\u1e0c\\u0307"); // D-dot_above dot_below 87 88 canonTests[6][0] = str("D\\u0307\\u0323"); canonTests[6][1] = str("D\\u0323\\u0307"); canonTests[6][2] = str("\\u1e0c\\u0307"); // D dot_below dot_above 89 90 canonTests[7][0] = str("\\u1e10\\u0307\\u0323"); canonTests[7][1] = str("D\\u0327\\u0323\\u0307"); canonTests[7][2] = str("\\u1e10\\u0323\\u0307"); // D dot_below cedilla dot_above 91 92 canonTests[8][0] = str("D\\u0307\\u0328\\u0323"); canonTests[8][1] = str("D\\u0328\\u0323\\u0307"); canonTests[8][2] = str("\\u1e0c\\u0328\\u0307"); // D dot_above ogonek dot_below 93 94 canonTests[9][0] = str("\\u1E14"); canonTests[9][1] = str("E\\u0304\\u0300"); canonTests[9][2] = str("\\u1E14"); // E-macron-grave 95 96 canonTests[10][0] = str("\\u0112\\u0300"); canonTests[10][1] = str("E\\u0304\\u0300"); canonTests[10][2] = str("\\u1E14"); // E-macron + grave 97 98 canonTests[11][0] = str("\\u00c8\\u0304"); canonTests[11][1] = str("E\\u0300\\u0304"); canonTests[11][2] = str("\\u00c8\\u0304"); // E-grave + macron 99 100 canonTests[12][0] = str("\\u212b"); canonTests[12][1] = str("A\\u030a"); canonTests[12][2] = str("\\u00c5"); // angstrom_sign 101 102 canonTests[13][0] = str("\\u00c5"); canonTests[13][1] = str("A\\u030a"); canonTests[13][2] = str("\\u00c5"); // A-ring 103 104 canonTests[14][0] = str("\\u00C4ffin"); canonTests[14][1] = str("A\\u0308ffin"); canonTests[14][2] = str("\\u00C4ffin"); 105 106 canonTests[15][0] = str("\\u00C4\\uFB03n"); canonTests[15][1] = str("A\\u0308\\uFB03n"); canonTests[15][2] = str("\\u00C4\\uFB03n"); 107 108 canonTests[16][0] = str("Henry IV"); canonTests[16][1] = str("Henry IV"); canonTests[16][2] = str("Henry IV"); 109 110 canonTests[17][0] = str("Henry \\u2163"); canonTests[17][1] = str("Henry \\u2163"); canonTests[17][2] = str("Henry \\u2163"); 111 112 canonTests[18][0] = str("\\u30AC"); canonTests[18][1] = str("\\u30AB\\u3099"); canonTests[18][2] = str("\\u30AC"); // ga (Katakana) 113 114 canonTests[19][0] = str("\\u30AB\\u3099"); canonTests[19][1] = str("\\u30AB\\u3099"); canonTests[19][2] = str("\\u30AC"); // ka + ten 115 116 canonTests[20][0] = str("\\uFF76\\uFF9E"); canonTests[20][1] = str("\\uFF76\\uFF9E"); canonTests[20][2] = str("\\uFF76\\uFF9E"); // hw_ka + hw_ten 117 118 canonTests[21][0] = str("\\u30AB\\uFF9E"); canonTests[21][1] = str("\\u30AB\\uFF9E"); canonTests[21][2] = str("\\u30AB\\uFF9E"); // ka + hw_ten 119 120 canonTests[22][0] = str("\\uFF76\\u3099"); canonTests[22][1] = str("\\uFF76\\u3099"); canonTests[22][2] = str("\\uFF76\\u3099"); // hw_ka + ten 121 122 canonTests[23][0] = str("A\\u0300\\u0316"); canonTests[23][1] = str("A\\u0316\\u0300"); canonTests[23][2] = str("\\u00C0\\u0316"); 123 124 /* compatTest */ 125 // Input Decomposed Composed 126 compatTests[0][0] = str("cat"); compatTests[0][1] = str("cat"); compatTests[0][2] = str("cat") ; 127 128 compatTests[1][0] = str("\\uFB4f"); compatTests[1][1] = str("\\u05D0\\u05DC"); compatTests[1][2] = str("\\u05D0\\u05DC"); // Alef-Lamed vs. Alef, Lamed 129 130 compatTests[2][0] = str("\\u00C4ffin"); compatTests[2][1] = str("A\\u0308ffin"); compatTests[2][2] = str("\\u00C4ffin") ; 131 132 compatTests[3][0] = str("\\u00C4\\uFB03n"); compatTests[3][1] = str("A\\u0308ffin"); compatTests[3][2] = str("\\u00C4ffin") ; // ffi ligature -> f + f + i 133 134 compatTests[4][0] = str("Henry IV"); compatTests[4][1] = str("Henry IV"); compatTests[4][2] = str("Henry IV") ; 135 136 compatTests[5][0] = str("Henry \\u2163"); compatTests[5][1] = str("Henry IV"); compatTests[5][2] = str("Henry IV") ; 137 138 compatTests[6][0] = str("\\u30AC"); compatTests[6][1] = str("\\u30AB\\u3099"); compatTests[6][2] = str("\\u30AC") ; // ga (Katakana) 139 140 compatTests[7][0] = str("\\u30AB\\u3099"); compatTests[7][1] = str("\\u30AB\\u3099"); compatTests[7][2] = str("\\u30AC") ; // ka + ten 141 142 compatTests[8][0] = str("\\uFF76\\u3099"); compatTests[8][1] = str("\\u30AB\\u3099"); compatTests[8][2] = str("\\u30AC") ; // hw_ka + ten 143 144 /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later */ 145 compatTests[9][0] = str("\\uFF76\\uFF9E"); compatTests[9][1] = str("\\u30AB\\u3099"); compatTests[9][2] = str("\\u30AC") ; // hw_ka + hw_ten 146 147 compatTests[10][0] = str("\\u30AB\\uFF9E"); compatTests[10][1] = str("\\u30AB\\u3099"); compatTests[10][2] = str("\\u30AC") ; // ka + hw_ten 148 149 /* Hangul Canonical */ 150 // Input Decomposed Composed 151 hangulCanon[0][0] = str("\\ud4db"); hangulCanon[0][1] = str("\\u1111\\u1171\\u11b6"); hangulCanon[0][2] = str("\\ud4db") ; 152 153 hangulCanon[1][0] = str("\\u1111\\u1171\\u11b6"), hangulCanon[1][1] = str("\\u1111\\u1171\\u11b6"), hangulCanon[1][2] = str("\\ud4db"); 154} 155 156BasicNormalizerTest::~BasicNormalizerTest() 157{ 158} 159 160void BasicNormalizerTest::TestPrevious() 161{ 162 Normalizer* norm = new Normalizer("", UNORM_NFD); 163 164 logln("testing decomp..."); 165 uint32_t i; 166 for (i = 0; i < ARRAY_LENGTH(canonTests); i++) { 167 backAndForth(norm, canonTests[i][0]); 168 } 169 170 logln("testing compose..."); 171 norm->setMode(UNORM_NFC); 172 for (i = 0; i < ARRAY_LENGTH(canonTests); i++) { 173 backAndForth(norm, canonTests[i][0]); 174 } 175 176 delete norm; 177} 178 179void BasicNormalizerTest::TestDecomp() 180{ 181 Normalizer* norm = new Normalizer("", UNORM_NFD); 182 iterateTest(norm, canonTests, ARRAY_LENGTH(canonTests), 1); 183 staticTest(UNORM_NFD, 0, canonTests, ARRAY_LENGTH(canonTests), 1); 184 delete norm; 185} 186 187void BasicNormalizerTest::TestCompatDecomp() 188{ 189 Normalizer* norm = new Normalizer("", UNORM_NFKD); 190 iterateTest(norm, compatTests, ARRAY_LENGTH(compatTests), 1); 191 192 staticTest(UNORM_NFKD, 0, 193 compatTests, ARRAY_LENGTH(compatTests), 1); 194 delete norm; 195} 196 197void BasicNormalizerTest::TestCanonCompose() 198{ 199 Normalizer* norm = new Normalizer("", UNORM_NFC); 200 iterateTest(norm, canonTests, ARRAY_LENGTH(canonTests), 2); 201 202 staticTest(UNORM_NFC, 0, canonTests, 203 ARRAY_LENGTH(canonTests), 2); 204 delete norm; 205} 206 207void BasicNormalizerTest::TestCompatCompose() 208{ 209 Normalizer* norm = new Normalizer("", UNORM_NFKC); 210 iterateTest(norm, compatTests, ARRAY_LENGTH(compatTests), 2); 211 212 staticTest(UNORM_NFKC, 0, 213 compatTests, ARRAY_LENGTH(compatTests), 2); 214 delete norm; 215} 216 217 218//------------------------------------------------------------------------------- 219 220void BasicNormalizerTest::TestHangulCompose() 221{ 222 // Make sure that the static composition methods work 223 logln("Canonical composition..."); 224 staticTest(UNORM_NFC, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 2); 225 logln("Compatibility composition..."); 226 227 // Now try iterative composition.... 228 logln("Static composition..."); 229 Normalizer* norm = new Normalizer("", UNORM_NFC); 230 iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 2); 231 norm->setMode(UNORM_NFKC); 232 233 // And finally, make sure you can do it in reverse too 234 logln("Reverse iteration..."); 235 norm->setMode(UNORM_NFC); 236 for (uint32_t i = 0; i < ARRAY_LENGTH(hangulCanon); i++) { 237 backAndForth(norm, hangulCanon[i][0]); 238 } 239 delete norm; 240} 241 242void BasicNormalizerTest::TestHangulDecomp() 243{ 244 // Make sure that the static decomposition methods work 245 logln("Canonical decomposition..."); 246 staticTest(UNORM_NFD, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 1); 247 logln("Compatibility decomposition..."); 248 249 // Now the iterative decomposition methods... 250 logln("Iterative decomposition..."); 251 Normalizer* norm = new Normalizer("", UNORM_NFD); 252 iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 1); 253 norm->setMode(UNORM_NFKD); 254 255 // And finally, make sure you can do it in reverse too 256 logln("Reverse iteration..."); 257 norm->setMode(UNORM_NFD); 258 for (uint32_t i = 0; i < ARRAY_LENGTH(hangulCanon); i++) { 259 backAndForth(norm, hangulCanon[i][0]); 260 } 261 delete norm; 262} 263 264/** 265 * The Tibetan vowel sign AA, 0f71, was messed up prior to Unicode version 2.1.9. 266 */ 267void BasicNormalizerTest::TestTibetan(void) { 268 UnicodeString decomp[1][3]; 269 decomp[0][0] = str("\\u0f77"); 270 decomp[0][1] = str("\\u0f77"); 271 decomp[0][2] = str("\\u0fb2\\u0f71\\u0f80"); 272 273 UnicodeString compose[1][3]; 274 compose[0][0] = str("\\u0fb2\\u0f71\\u0f80"); 275 compose[0][1] = str("\\u0fb2\\u0f71\\u0f80"); 276 compose[0][2] = str("\\u0fb2\\u0f71\\u0f80"); 277 278 staticTest(UNORM_NFD, 0, decomp, ARRAY_LENGTH(decomp), 1); 279 staticTest(UNORM_NFKD, 0, decomp, ARRAY_LENGTH(decomp), 2); 280 staticTest(UNORM_NFC, 0, compose, ARRAY_LENGTH(compose), 1); 281 staticTest(UNORM_NFKC, 0, compose, ARRAY_LENGTH(compose), 2); 282} 283 284/** 285 * Make sure characters in the CompositionExclusion.txt list do not get 286 * composed to. 287 */ 288void BasicNormalizerTest::TestCompositionExclusion(void) { 289 // This list is generated from CompositionExclusion.txt. 290 // Update whenever the normalizer tables are updated. Note 291 // that we test all characters listed, even those that can be 292 // derived from the Unicode DB and are therefore commented 293 // out. 294 // ### TODO read composition exclusion from source/data/unidata file 295 // and test against that 296 UnicodeString EXCLUDED = str( 297 "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958" 298 "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC" 299 "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E" 300 "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69" 301 "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2" 302 "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79" 303 "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u1FDB" 304 "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD\\u2000" 305 "\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\uF900\\uFA10" 306 "\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A\\uFB1F" 307 "\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E\\uFB2F\\uFB30\\uFB31" 308 "\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36\\uFB38\\uFB39\\uFB3A" 309 "\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46" 310 "\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E" 311 ); 312 for (int32_t i=0; i<EXCLUDED.length(); ++i) { 313 UnicodeString a(EXCLUDED.charAt(i)); 314 UnicodeString b; 315 UnicodeString c; 316 Normalizer::normalize(a, UNORM_NFKD, 0, b, status); 317 Normalizer::normalize(b, UNORM_NFC, 0, c, status); 318 if (c == a) { 319 errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " + 320 hex(b) + " x COMPOSE => " + 321 hex(c)); 322 } else if (verbose) { 323 logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " + 324 hex(b) + " x COMPOSE => " + 325 hex(c)); 326 } 327 } 328} 329 330/** 331 * Test for a problem that showed up just before ICU 1.6 release 332 * having to do with combining characters with an index of zero. 333 * Such characters do not participate in any canonical 334 * decompositions. However, having an index of zero means that 335 * they all share one typeMask[] entry, that is, they all have to 336 * map to the same canonical class, which is not the case, in 337 * reality. 338 */ 339void BasicNormalizerTest::TestZeroIndex(void) { 340 const char* DATA[] = { 341 // Expect col1 x COMPOSE_COMPAT => col2 342 // Expect col2 x DECOMP => col3 343 "A\\u0316\\u0300", "\\u00C0\\u0316", "A\\u0316\\u0300", 344 "A\\u0300\\u0316", "\\u00C0\\u0316", "A\\u0316\\u0300", 345 "A\\u0327\\u0300", "\\u00C0\\u0327", "A\\u0327\\u0300", 346 "c\\u0321\\u0327", "c\\u0321\\u0327", "c\\u0321\\u0327", 347 "c\\u0327\\u0321", "\\u00E7\\u0321", "c\\u0327\\u0321", 348 }; 349 int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 350 351 for (int32_t i=0; i<DATA_length; i+=3) { 352 UErrorCode status = U_ZERO_ERROR; 353 UnicodeString a(DATA[i], ""); 354 a = a.unescape(); 355 UnicodeString b; 356 Normalizer::normalize(a, UNORM_NFKC, 0, b, status); 357 if (U_FAILURE(status)) { 358 dataerrln("Error calling normalize UNORM_NFKC: %s", u_errorName(status)); 359 } else { 360 UnicodeString exp(DATA[i+1], ""); 361 exp = exp.unescape(); 362 if (b == exp) { 363 logln((UnicodeString)"Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b)); 364 } else { 365 errln((UnicodeString)"FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) + 366 ", expect " + hex(exp)); 367 } 368 } 369 Normalizer::normalize(b, UNORM_NFD, 0, a, status); 370 if (U_FAILURE(status)) { 371 dataerrln("Error calling normalize UNORM_NFD: %s", u_errorName(status)); 372 } else { 373 UnicodeString exp = UnicodeString(DATA[i+2], "").unescape(); 374 if (a == exp) { 375 logln((UnicodeString)"Ok: " + hex(b) + " x DECOMP => " + hex(a)); 376 } else { 377 errln((UnicodeString)"FAIL: " + hex(b) + " x DECOMP => " + hex(a) + 378 ", expect " + hex(exp)); 379 } 380 } 381 } 382} 383 384/** 385 * Run a few specific cases that are failing for Verisign. 386 */ 387void BasicNormalizerTest::TestVerisign(void) { 388 /* 389 > Their input: 390 > 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F 391 > Their output (supposedly from ICU): 392 > 05B8 05B1 05B9 0591 05C3 05B0 05AC 059F 393 > My output from charlint: 394 > 05B1 05B8 05B9 0591 05C3 05B0 05AC 059F 395 396 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F => 05B1 05B8 05B9 0591 05C3 05B0 397 05AC 059F 398 399 U+05B8 18 E HEBREW POINT QAMATS 400 U+05B9 19 F HEBREW POINT HOLAM 401 U+05B1 11 HEBREW POINT HATAF SEGOL 402 U+0591 220 HEBREW ACCENT ETNAHTA 403 U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 404 U+05B0 10 HEBREW POINT SHEVA 405 U+05AC 230 HEBREW ACCENT ILUY 406 U+059F 230 HEBREW ACCENT QARNEY PARA 407 408 U+05B1 11 HEBREW POINT HATAF SEGOL 409 U+05B8 18 HEBREW POINT QAMATS 410 U+05B9 19 HEBREW POINT HOLAM 411 U+0591 220 HEBREW ACCENT ETNAHTA 412 U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 413 U+05B0 10 HEBREW POINT SHEVA 414 U+05AC 230 HEBREW ACCENT ILUY 415 U+059F 230 HEBREW ACCENT QARNEY PARA 416 417 Wrong result: 418 U+05B8 18 HEBREW POINT QAMATS 419 U+05B1 11 HEBREW POINT HATAF SEGOL 420 U+05B9 19 HEBREW POINT HOLAM 421 U+0591 220 HEBREW ACCENT ETNAHTA 422 U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 423 U+05B0 10 HEBREW POINT SHEVA 424 U+05AC 230 HEBREW ACCENT ILUY 425 U+059F 230 HEBREW ACCENT QARNEY PARA 426 427 428 > Their input: 429 >0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD 430 >Their output (supposedly from ICU): 431 >0592 05B0 05B7 05BC 05A5 05C0 05AD 05C4 432 >My output from charlint: 433 >05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4 434 435 0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD => 05B0 05B7 05BC 05A5 0592 05C0 436 05AD 05C4 437 438 U+0592 230 HEBREW ACCENT SEGOL 439 U+05B7 17 HEBREW POINT PATAH 440 U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 441 U+05A5 220 HEBREW ACCENT MERKHA 442 U+05B0 10 HEBREW POINT SHEVA 443 U+05C0 0 HEBREW PUNCTUATION PASEQ 444 U+05C4 230 HEBREW MARK UPPER DOT 445 U+05AD 222 HEBREW ACCENT DEHI 446 447 U+05B0 10 HEBREW POINT SHEVA 448 U+05B7 17 HEBREW POINT PATAH 449 U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 450 U+05A5 220 HEBREW ACCENT MERKHA 451 U+0592 230 HEBREW ACCENT SEGOL 452 U+05C0 0 HEBREW PUNCTUATION PASEQ 453 U+05AD 222 HEBREW ACCENT DEHI 454 U+05C4 230 HEBREW MARK UPPER DOT 455 456 Wrong result: 457 U+0592 230 HEBREW ACCENT SEGOL 458 U+05B0 10 HEBREW POINT SHEVA 459 U+05B7 17 HEBREW POINT PATAH 460 U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 461 U+05A5 220 HEBREW ACCENT MERKHA 462 U+05C0 0 HEBREW PUNCTUATION PASEQ 463 U+05AD 222 HEBREW ACCENT DEHI 464 U+05C4 230 HEBREW MARK UPPER DOT 465 */ 466 UnicodeString data[2][3]; 467 data[0][0] = str("\\u05B8\\u05B9\\u05B1\\u0591\\u05C3\\u05B0\\u05AC\\u059F"); 468 data[0][1] = str("\\u05B1\\u05B8\\u05B9\\u0591\\u05C3\\u05B0\\u05AC\\u059F"); 469 data[0][2] = str(""); 470 data[1][0] = str("\\u0592\\u05B7\\u05BC\\u05A5\\u05B0\\u05C0\\u05C4\\u05AD"); 471 data[1][1] = str("\\u05B0\\u05B7\\u05BC\\u05A5\\u0592\\u05C0\\u05AD\\u05C4"); 472 data[1][2] = str(""); 473 474 staticTest(UNORM_NFD, 0, data, ARRAY_LENGTH(data), 1); 475 staticTest(UNORM_NFC, 0, data, ARRAY_LENGTH(data), 1); 476} 477 478//------------------------------------------------------------------------ 479// Internal utilities 480// 481 482UnicodeString BasicNormalizerTest::hex(UChar ch) { 483 UnicodeString result; 484 return appendHex(ch, 4, result); 485} 486 487UnicodeString BasicNormalizerTest::hex(const UnicodeString& s) { 488 UnicodeString result; 489 for (int i = 0; i < s.length(); ++i) { 490 if (i != 0) result += (UChar)0x2c/*,*/; 491 appendHex(s[i], 4, result); 492 } 493 return result; 494} 495 496 497inline static void insert(UnicodeString& dest, int pos, UChar32 ch) 498{ 499 dest.replace(pos, 0, ch); 500} 501 502void BasicNormalizerTest::backAndForth(Normalizer* iter, const UnicodeString& input) 503{ 504 UChar32 ch; 505 iter->setText(input, status); 506 507 // Run through the iterator forwards and stick it into a StringBuffer 508 UnicodeString forward; 509 for (ch = iter->first(); ch != iter->DONE; ch = iter->next()) { 510 forward += ch; 511 } 512 513 // Now do it backwards 514 UnicodeString reverse; 515 for (ch = iter->last(); ch != iter->DONE; ch = iter->previous()) { 516 insert(reverse, 0, ch); 517 } 518 519 if (forward != reverse) { 520 errln("Forward/reverse mismatch for input " + hex(input) 521 + ", forward: " + hex(forward) + ", backward: " + hex(reverse)); 522 } 523} 524 525void BasicNormalizerTest::staticTest(UNormalizationMode mode, int options, 526 UnicodeString tests[][3], int length, 527 int outCol) 528{ 529 for (int i = 0; i < length; i++) 530 { 531 UnicodeString& input = tests[i][0]; 532 UnicodeString& expect = tests[i][outCol]; 533 534 logln("Normalizing '" + input + "' (" + hex(input) + ")" ); 535 536 UnicodeString output; 537 Normalizer::normalize(input, mode, options, output, status); 538 539 if (output != expect) { 540 dataerrln(UnicodeString("ERROR: case ") + i + " normalized " + hex(input) + "\n" 541 + " expected " + hex(expect) + "\n" 542 + " static got " + hex(output) ); 543 } 544 } 545} 546 547void BasicNormalizerTest::iterateTest(Normalizer* iter, 548 UnicodeString tests[][3], int length, 549 int outCol) 550{ 551 for (int i = 0; i < length; i++) 552 { 553 UnicodeString& input = tests[i][0]; 554 UnicodeString& expect = tests[i][outCol]; 555 556 logln("Normalizing '" + input + "' (" + hex(input) + ")" ); 557 558 iter->setText(input, status); 559 assertEqual(input, expect, iter, UnicodeString("ERROR: case ") + i + " "); 560 } 561} 562 563void BasicNormalizerTest::assertEqual(const UnicodeString& input, 564 const UnicodeString& expected, 565 Normalizer* iter, 566 const UnicodeString& errPrefix) 567{ 568 UnicodeString result; 569 570 for (UChar32 ch = iter->first(); ch != iter->DONE; ch = iter->next()) { 571 result += ch; 572 } 573 if (result != expected) { 574 dataerrln(errPrefix + "normalized " + hex(input) + "\n" 575 + " expected " + hex(expected) + "\n" 576 + " iterate got " + hex(result) ); 577 } 578} 579 580// helper class for TestPreviousNext() 581// simple UTF-32 character iterator 582class UChar32Iterator { 583public: 584 UChar32Iterator(const UChar32 *text, int32_t len, int32_t index) : 585 s(text), length(len), i(index) {} 586 587 UChar32 current() { 588 if(i<length) { 589 return s[i]; 590 } else { 591 return 0xffff; 592 } 593 } 594 595 UChar32 next() { 596 if(i<length) { 597 return s[i++]; 598 } else { 599 return 0xffff; 600 } 601 } 602 603 UChar32 previous() { 604 if(i>0) { 605 return s[--i]; 606 } else { 607 return 0xffff; 608 } 609 } 610 611 int32_t getIndex() { 612 return i; 613 } 614private: 615 const UChar32 *s; 616 int32_t length, i; 617}; 618 619void 620BasicNormalizerTest::TestPreviousNext(const UChar *src, int32_t srcLength, 621 const UChar32 *expect, int32_t expectLength, 622 const int32_t *expectIndex, // its length=expectLength+1 623 int32_t srcMiddle, int32_t expectMiddle, 624 const char *moves, 625 UNormalizationMode mode, 626 const char *name) { 627 // iterators 628 Normalizer iter(src, srcLength, mode); 629 630 // test getStaticClassID and getDynamicClassID 631 if(iter.getDynamicClassID() != Normalizer::getStaticClassID()) { 632 errln("getStaticClassID != getDynamicClassID for Normalizer."); 633 } 634 635 UChar32Iterator iter32(expect, expectLength, expectMiddle); 636 637 UChar32 c1, c2; 638 char m; 639 640 // initially set the indexes into the middle of the strings 641 iter.setIndexOnly(srcMiddle); 642 643 // move around and compare the iteration code points with 644 // the expected ones 645 const char *move=moves; 646 while((m=*move++)!=0) { 647 if(m=='-') { 648 c1=iter.previous(); 649 c2=iter32.previous(); 650 } else if(m=='0') { 651 c1=iter.current(); 652 c2=iter32.current(); 653 } else /* m=='+' */ { 654 c1=iter.next(); 655 c2=iter32.next(); 656 } 657 658 // compare results 659 if(c1!=c2) { 660 // copy the moves until the current (m) move, and terminate 661 char history[64]; 662 uprv_strcpy(history, moves); 663 history[move-moves]=0; 664 dataerrln("error: mismatch in Normalizer iteration (%s) at %s: " 665 "got c1=U+%04lx != expected c2=U+%04lx", 666 name, history, c1, c2); 667 break; 668 } 669 670 // compare indexes 671 if(iter.getIndex()!=expectIndex[iter32.getIndex()]) { 672 // copy the moves until the current (m) move, and terminate 673 char history[64]; 674 uprv_strcpy(history, moves); 675 history[move-moves]=0; 676 errln("error: index mismatch in Normalizer iteration (%s) at %s: " 677 "Normalizer index %ld expected %ld\n", 678 name, history, iter.getIndex(), expectIndex[iter32.getIndex()]); 679 break; 680 } 681 } 682} 683 684void 685BasicNormalizerTest::TestPreviousNext() { 686 // src and expect strings 687 static const UChar src[]={ 688 UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999), 689 UTF16_LEAD(0x1d15f), UTF16_TRAIL(0x1d15f), 690 0xc4, 691 0x1ed0 692 }; 693 static const UChar32 expect[]={ 694 0x831d, 695 0x1d158, 0x1d165, 696 0x41, 0x308, 697 0x4f, 0x302, 0x301 698 }; 699 700 // expected src indexes corresponding to expect indexes 701 static const int32_t expectIndex[]={ 702 0, 703 2, 2, 704 4, 4, 705 5, 5, 5, 706 6 // behind last character 707 }; 708 709 // src and expect strings for regression test for j2911 710 static const UChar src_j2911[]={ 711 UTF16_LEAD(0x2f999), UTF16_TRAIL(0x2f999), 712 0xdd00, 0xd900, // unpaired surrogates - regression test for j2911 713 0xc4, 714 0x4f, 0x302, 0x301 715 }; 716 static const UChar32 expect_j2911[]={ 717 0x831d, 718 0xdd00, 0xd900, // unpaired surrogates - regression test for j2911 719 0xc4, 720 0x1ed0 721 }; 722 723 // expected src indexes corresponding to expect indexes 724 static const int32_t expectIndex_j2911[]={ 725 0, 726 2, 3, 727 4, 728 5, 729 8 // behind last character 730 }; 731 732 // initial indexes into the src and expect strings 733 // for both sets of test data 734 enum { 735 SRC_MIDDLE=4, 736 EXPECT_MIDDLE=3, 737 SRC_MIDDLE_2=2, 738 EXPECT_MIDDLE_2=1 739 }; 740 741 // movement vector 742 // - for previous(), 0 for current(), + for next() 743 // for both sets of test data 744 static const char *const moves="0+0+0--0-0-+++0--+++++++0--------"; 745 746 TestPreviousNext(src, LENGTHOF(src), 747 expect, LENGTHOF(expect), 748 expectIndex, 749 SRC_MIDDLE, EXPECT_MIDDLE, 750 moves, UNORM_NFD, "basic"); 751 752 TestPreviousNext(src_j2911, LENGTHOF(src_j2911), 753 expect_j2911, LENGTHOF(expect_j2911), 754 expectIndex_j2911, 755 SRC_MIDDLE, EXPECT_MIDDLE, 756 moves, UNORM_NFKC, "j2911"); 757 758 // try again from different "middle" indexes 759 TestPreviousNext(src, LENGTHOF(src), 760 expect, LENGTHOF(expect), 761 expectIndex, 762 SRC_MIDDLE_2, EXPECT_MIDDLE_2, 763 moves, UNORM_NFD, "basic_2"); 764 765 TestPreviousNext(src_j2911, LENGTHOF(src_j2911), 766 expect_j2911, LENGTHOF(expect_j2911), 767 expectIndex_j2911, 768 SRC_MIDDLE_2, EXPECT_MIDDLE_2, 769 moves, UNORM_NFKC, "j2911_2"); 770} 771 772void BasicNormalizerTest::TestConcatenate() { 773 static const char *const 774 cases[][4]={ 775 /* mode, left, right, result */ 776 { 777 "C", 778 "re", 779 "\\u0301sum\\u00e9", 780 "r\\u00e9sum\\u00e9" 781 }, 782 { 783 "C", 784 "a\\u1100", 785 "\\u1161bcdefghijk", 786 "a\\uac00bcdefghijk" 787 }, 788 /* ### TODO: add more interesting cases */ 789 { 790 "D", 791 "\\u03B1\\u0345", 792 "\\u0C4D\\U000110BA\\U0001D169", 793 "\\u03B1\\U0001D169\\U000110BA\\u0C4D\\u0345" 794 } 795 }; 796 797 UnicodeString left, right, expect, result, r; 798 UErrorCode errorCode; 799 UNormalizationMode mode; 800 int32_t i; 801 802 /* test concatenation */ 803 for(i=0; i<(int32_t)(sizeof(cases)/sizeof(cases[0])); ++i) { 804 switch(*cases[i][0]) { 805 case 'C': mode=UNORM_NFC; break; 806 case 'D': mode=UNORM_NFD; break; 807 case 'c': mode=UNORM_NFKC; break; 808 case 'd': mode=UNORM_NFKD; break; 809 default: mode=UNORM_NONE; break; 810 } 811 812 left=UnicodeString(cases[i][1], "").unescape(); 813 right=UnicodeString(cases[i][2], "").unescape(); 814 expect=UnicodeString(cases[i][3], "").unescape(); 815 816 //result=r=UnicodeString(); 817 errorCode=U_ZERO_ERROR; 818 819 r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 820 if(U_FAILURE(errorCode) || /*result!=r ||*/ result!=expect) { 821 dataerrln("error in Normalizer::concatenate(), cases[] fails with "+ 822 UnicodeString(u_errorName(errorCode))+", result==expect: expected: "+ 823 hex(expect)+" =========> got: " + hex(result)); 824 } 825 } 826 827 /* test error cases */ 828 829 /* left.getBuffer()==result.getBuffer() */ 830 result=r=expect=UnicodeString("zz", ""); 831 errorCode=U_UNEXPECTED_TOKEN; 832 r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 833 if(errorCode!=U_UNEXPECTED_TOKEN || result!=r || !result.isBogus()) { 834 errln("error in Normalizer::concatenate(), violates UErrorCode protocol"); 835 } 836 837 left.setToBogus(); 838 errorCode=U_ZERO_ERROR; 839 r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 840 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || result!=r || !result.isBogus()) { 841 errln("error in Normalizer::concatenate(), does not detect left.isBogus()"); 842 } 843} 844 845// reference implementation of Normalizer::compare 846static int32_t 847ref_norm_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { 848 UnicodeString r1, r2, t1, t2; 849 int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); 850 851 if(options&U_COMPARE_IGNORE_CASE) { 852 Normalizer::decompose(s1, FALSE, normOptions, r1, errorCode); 853 Normalizer::decompose(s2, FALSE, normOptions, r2, errorCode); 854 855 r1.foldCase(options); 856 r2.foldCase(options); 857 } else { 858 r1=s1; 859 r2=s2; 860 } 861 862 Normalizer::decompose(r1, FALSE, normOptions, t1, errorCode); 863 Normalizer::decompose(r2, FALSE, normOptions, t2, errorCode); 864 865 if(options&U_COMPARE_CODE_POINT_ORDER) { 866 return t1.compareCodePointOrder(t2); 867 } else { 868 return t1.compare(t2); 869 } 870} 871 872// test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately 873static int32_t 874_norm_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { 875 int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); 876 877 if( UNORM_YES==Normalizer::quickCheck(s1, UNORM_FCD, normOptions, errorCode) && 878 UNORM_YES==Normalizer::quickCheck(s2, UNORM_FCD, normOptions, errorCode)) { 879 options|=UNORM_INPUT_IS_FCD; 880 } 881 882 return Normalizer::compare(s1, s2, options, errorCode); 883} 884 885// reference implementation of UnicodeString::caseCompare 886static int32_t 887ref_case_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options) { 888 UnicodeString t1, t2; 889 890 t1=s1; 891 t2=s2; 892 893 t1.foldCase(options); 894 t2.foldCase(options); 895 896 if(options&U_COMPARE_CODE_POINT_ORDER) { 897 return t1.compareCodePointOrder(t2); 898 } else { 899 return t1.compare(t2); 900 } 901} 902 903// reduce an integer to -1/0/1 904static inline int32_t 905_sign(int32_t value) { 906 if(value==0) { 907 return 0; 908 } else { 909 return (value>>31)|1; 910 } 911} 912 913static const char * 914_signString(int32_t value) { 915 if(value<0) { 916 return "<0"; 917 } else if(value==0) { 918 return "=0"; 919 } else /* value>0 */ { 920 return ">0"; 921 } 922} 923 924void 925BasicNormalizerTest::TestCompare() { 926 // test Normalizer::compare and unorm_compare (thinly wrapped by the former) 927 // by comparing it with its semantic equivalent 928 // since we trust the pieces, this is sufficient 929 930 // test each string with itself and each other 931 // each time with all options 932 static const char *const 933 strings[]={ 934 // some cases from NormalizationTest.txt 935 // 0..3 936 "D\\u031B\\u0307\\u0323", 937 "\\u1E0C\\u031B\\u0307", 938 "D\\u031B\\u0323\\u0307", 939 "d\\u031B\\u0323\\u0307", 940 941 // 4..6 942 "\\u00E4", 943 "a\\u0308", 944 "A\\u0308", 945 946 // Angstrom sign = A ring 947 // 7..10 948 "\\u212B", 949 "\\u00C5", 950 "A\\u030A", 951 "a\\u030A", 952 953 // 11.14 954 "a\\u059A\\u0316\\u302A\\u032Fb", 955 "a\\u302A\\u0316\\u032F\\u059Ab", 956 "a\\u302A\\u0316\\u032F\\u059Ab", 957 "A\\u059A\\u0316\\u302A\\u032Fb", 958 959 // from ICU case folding tests 960 // 15..20 961 "A\\u00df\\u00b5\\ufb03\\U0001040c\\u0131", 962 "ass\\u03bcffi\\U00010434i", 963 "\\u0061\\u0042\\u0131\\u03a3\\u00df\\ufb03\\ud93f\\udfff", 964 "\\u0041\\u0062\\u0069\\u03c3\\u0073\\u0053\\u0046\\u0066\\u0049\\ud93f\\udfff", 965 "\\u0041\\u0062\\u0131\\u03c3\\u0053\\u0073\\u0066\\u0046\\u0069\\ud93f\\udfff", 966 "\\u0041\\u0062\\u0069\\u03c3\\u0073\\u0053\\u0046\\u0066\\u0049\\ud93f\\udffd", 967 968 // U+d800 U+10001 see implementation comment in unorm_cmpEquivFold 969 // vs. U+10000 at bottom - code point order 970 // 21..22 971 "\\ud800\\ud800\\udc01", 972 "\\ud800\\udc00", 973 974 // other code point order tests from ustrtest.cpp 975 // 23..31 976 "\\u20ac\\ud801", 977 "\\u20ac\\ud800\\udc00", 978 "\\ud800", 979 "\\ud800\\uff61", 980 "\\udfff", 981 "\\uff61\\udfff", 982 "\\uff61\\ud800\\udc02", 983 "\\ud800\\udc02", 984 "\\ud84d\\udc56", 985 986 // long strings, see cnormtst.c/TestNormCoverage() 987 // equivalent if case-insensitive 988 // 32..33 989 "\\uAD8B\\uAD8B\\uAD8B\\uAD8B" 990 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 991 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 992 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 993 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 994 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 995 "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" 996 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 997 "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" 998 "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" 999 "\\uAD8B\\uAD8B\\uAD8B\\uAD8B" 1000 "d\\u031B\\u0307\\u0323", 1001 1002 "\\u1100\\u116f\\u11aa\\uAD8B\\uAD8B\\u1100\\u116f\\u11aa" 1003 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1004 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1005 "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1006 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1007 "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1008 "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" 1009 "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 1010 "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" 1011 "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" 1012 "\\u1100\\u116f\\u11aa\\uAD8B\\uAD8B\\u1100\\u116f\\u11aa" 1013 "\\u1E0C\\u031B\\u0307", 1014 1015 // some strings that may make a difference whether the compare function 1016 // case-folds or decomposes first 1017 // 34..41 1018 "\\u0360\\u0345\\u0334", 1019 "\\u0360\\u03b9\\u0334", 1020 1021 "\\u0360\\u1f80\\u0334", 1022 "\\u0360\\u03b1\\u0313\\u03b9\\u0334", 1023 1024 "\\u0360\\u1ffc\\u0334", 1025 "\\u0360\\u03c9\\u03b9\\u0334", 1026 1027 "a\\u0360\\u0345\\u0360\\u0345b", 1028 "a\\u0345\\u0360\\u0345\\u0360b", 1029 1030 // interesting cases for canonical caseless match with turkic i handling 1031 // 42..43 1032 "\\u00cc", 1033 "\\u0069\\u0300", 1034 1035 // strings with post-Unicode 3.2 normalization or normalization corrections 1036 // 44..45 1037 "\\u00e4\\u193b\\U0002f868", 1038 "\\u0061\\u193b\\u0308\\u36fc", 1039 1040 // empty string 1041 // 46 1042 "" 1043 }; 1044 1045 UnicodeString s[100]; // at least as many items as in strings[] ! 1046 1047 // all combinations of options 1048 // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions 1049 // set UNORM_UNICODE_3_2 in one additional combination 1050 static const struct { 1051 uint32_t options; 1052 const char *name; 1053 } opt[]={ 1054 { 0, "default" }, 1055 { U_COMPARE_CODE_POINT_ORDER, "c.p. order" }, 1056 { U_COMPARE_IGNORE_CASE, "ignore case" }, 1057 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE, "c.p. order & ignore case" }, 1058 { U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & special i" }, 1059 { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "c.p. order & ignore case & special i" }, 1060 { UNORM_UNICODE_3_2<<UNORM_COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2" } 1061 }; 1062 1063 int32_t i, j, k, count=LENGTHOF(strings); 1064 int32_t result, refResult; 1065 1066 UErrorCode errorCode; 1067 1068 // create the UnicodeStrings 1069 for(i=0; i<count; ++i) { 1070 s[i]=UnicodeString(strings[i], "").unescape(); 1071 } 1072 1073 // test them each with each other 1074 for(i=0; i<count; ++i) { 1075 for(j=i; j<count; ++j) { 1076 for(k=0; k<LENGTHOF(opt); ++k) { 1077 // test Normalizer::compare 1078 errorCode=U_ZERO_ERROR; 1079 result=_norm_compare(s[i], s[j], opt[k].options, errorCode); 1080 refResult=ref_norm_compare(s[i], s[j], opt[k].options, errorCode); 1081 if(_sign(result)!=_sign(refResult)) { 1082 errln("Normalizer::compare(%d, %d, %s)%s should be %s %s", 1083 i, j, opt[k].name, _signString(result), _signString(refResult), 1084 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1085 } 1086 1087 // test UnicodeString::caseCompare - same internal implementation function 1088 if(opt[k].options&U_COMPARE_IGNORE_CASE) { 1089 errorCode=U_ZERO_ERROR; 1090 result=s[i].caseCompare(s[j], opt[k].options); 1091 refResult=ref_case_compare(s[i], s[j], opt[k].options); 1092 if(_sign(result)!=_sign(refResult)) { 1093 errln("UniStr::caseCompare(%d, %d, %s)%s should be %s %s", 1094 i, j, opt[k].name, _signString(result), _signString(refResult), 1095 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1096 } 1097 } 1098 } 1099 } 1100 } 1101 1102 // test cases with i and I to make sure Turkic works 1103 static const UChar iI[]={ 0x49, 0x69, 0x130, 0x131 }; 1104 USerializedSet sset; 1105 UnicodeSet set; 1106 1107 UnicodeString s1, s2; 1108 UChar32 start, end; 1109 1110 // collect all sets into one for contiguous output 1111 for(i=0; i<LENGTHOF(iI); ++i) { 1112 if(unorm_getCanonStartSet(iI[i], &sset)) { 1113 count=uset_getSerializedRangeCount(&sset); 1114 for(j=0; j<count; ++j) { 1115 uset_getSerializedRange(&sset, j, &start, &end); 1116 set.add(start, end); 1117 } 1118 } 1119 } 1120 1121 // test all of these precomposed characters 1122 UnicodeSetIterator it(set); 1123 while(it.nextRange() && !it.isString()) { 1124 start=it.getCodepoint(); 1125 end=it.getCodepointEnd(); 1126 while(start<=end) { 1127 s1.setTo(start); 1128 errorCode=U_ZERO_ERROR; 1129 Normalizer::decompose(s1, FALSE, 0, s2, errorCode); 1130 if(U_FAILURE(errorCode)) { 1131 dataerrln("Normalizer::decompose(U+%04x) failed: %s", start, u_errorName(errorCode)); 1132 return; 1133 } 1134 1135 for(k=0; k<LENGTHOF(opt); ++k) { 1136 // test Normalizer::compare 1137 errorCode=U_ZERO_ERROR; 1138 result=_norm_compare(s1, s2, opt[k].options, errorCode); 1139 refResult=ref_norm_compare(s1, s2, opt[k].options, errorCode); 1140 if(_sign(result)!=_sign(refResult)) { 1141 errln("Normalizer::compare(U+%04x with its NFD, %s)%s should be %s %s", 1142 start, opt[k].name, _signString(result), _signString(refResult), 1143 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1144 } 1145 1146 // test UnicodeString::caseCompare - same internal implementation function 1147 if(opt[k].options&U_COMPARE_IGNORE_CASE) { 1148 errorCode=U_ZERO_ERROR; 1149 result=s1.caseCompare(s2, opt[k].options); 1150 refResult=ref_case_compare(s1, s2, opt[k].options); 1151 if(_sign(result)!=_sign(refResult)) { 1152 errln("UniStr::caseCompare(U+%04x with its NFD, %s)%s should be %s %s", 1153 start, opt[k].name, _signString(result), _signString(refResult), 1154 U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1155 } 1156 } 1157 } 1158 1159 ++start; 1160 } 1161 } 1162} 1163 1164// verify that case-folding does not un-FCD strings 1165int32_t 1166BasicNormalizerTest::countFoldFCDExceptions(uint32_t foldingOptions) { 1167 UnicodeString s, fold, d; 1168 UChar32 c; 1169 int32_t count; 1170 uint8_t cc, trailCC, foldCC, foldTrailCC; 1171 UNormalizationCheckResult qcResult; 1172 int8_t category; 1173 UBool isNFD; 1174 UErrorCode errorCode; 1175 1176 logln("Test if case folding may un-FCD a string (folding options %04lx)", foldingOptions); 1177 1178 count=0; 1179 for(c=0; c<=0x10ffff; ++c) { 1180 errorCode = U_ZERO_ERROR; 1181 category=u_charType(c); 1182 if(category==U_UNASSIGNED) { 1183 continue; // skip unassigned code points 1184 } 1185 if(c==0xac00) { 1186 c=0xd7a3; // skip Hangul - no case folding there 1187 continue; 1188 } 1189 // skip Han blocks - no case folding there either 1190 if(c==0x3400) { 1191 c=0x4db5; 1192 continue; 1193 } 1194 if(c==0x4e00) { 1195 c=0x9fa5; 1196 continue; 1197 } 1198 if(c==0x20000) { 1199 c=0x2a6d6; 1200 continue; 1201 } 1202 1203 s.setTo(c); 1204 1205 // get leading and trailing cc for c 1206 Normalizer::decompose(s, FALSE, 0, d, errorCode); 1207 isNFD= s==d; 1208 cc=u_getCombiningClass(d.char32At(0)); 1209 trailCC=u_getCombiningClass(d.char32At(d.length()-1)); 1210 1211 // get leading and trailing cc for the case-folding of c 1212 s.foldCase(foldingOptions); 1213 Normalizer::decompose(s, FALSE, 0, d, errorCode); 1214 foldCC=u_getCombiningClass(d.char32At(0)); 1215 foldTrailCC=u_getCombiningClass(d.char32At(d.length()-1)); 1216 1217 qcResult=Normalizer::quickCheck(s, UNORM_FCD, errorCode); 1218 1219 if (U_FAILURE(errorCode)) { 1220 ++count; 1221 dataerrln("U+%04lx: Failed with error %s", u_errorName(errorCode)); 1222 } 1223 1224 // bad: 1225 // - character maps to empty string: adjacent characters may then need reordering 1226 // - folding has different leading/trailing cc's, and they don't become just 0 1227 // - folding itself is not FCD 1228 if( qcResult!=UNORM_YES || 1229 s.isEmpty() || 1230 (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0) 1231 ) { 1232 ++count; 1233 dataerrln("U+%04lx: case-folding may un-FCD a string (folding options %04lx)", c, foldingOptions); 1234 dataerrln(" cc %02x trailCC %02x foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x quickCheck(folded)=%d", cc, trailCC, d.char32At(0), foldCC, d.char32At(d.length()-1), foldTrailCC, qcResult); 1235 continue; 1236 } 1237 1238 // also bad: 1239 // if a code point is in NFD but its case folding is not, then 1240 // unorm_compare will also fail 1241 if(isNFD && UNORM_YES!=Normalizer::quickCheck(s, UNORM_NFD, errorCode)) { 1242 ++count; 1243 errln("U+%04lx: case-folding un-NFDs this character (folding options %04lx)", c, foldingOptions); 1244 } 1245 } 1246 1247 logln("There are %ld code points for which case-folding may un-FCD a string (folding options %04lx)", count, foldingOptions); 1248 return count; 1249} 1250 1251void 1252BasicNormalizerTest::FindFoldFCDExceptions() { 1253 int32_t count; 1254 1255 count=countFoldFCDExceptions(0); 1256 count+=countFoldFCDExceptions(U_FOLD_CASE_EXCLUDE_SPECIAL_I); 1257 if(count>0) { 1258 /* 1259 * If case-folding un-FCDs any strings, then unorm_compare() must be 1260 * re-implemented. 1261 * It currently assumes that one can check for FCD then case-fold 1262 * and then still have FCD strings for raw decomposition without reordering. 1263 */ 1264 dataerrln("error: There are %ld code points for which case-folding may un-FCD a string for all folding options.\n" 1265 "See comment in BasicNormalizerTest::FindFoldFCDExceptions()!", count); 1266 } 1267} 1268 1269/* 1270 * Hardcoded "NF* Skippable" sets, generated from 1271 * Mark Davis' com.ibm.text.UCD.NFSkippable (see ICU4J CVS, module unicodetools). 1272 * Run com.ibm.text.UCD.Main with the option NFSkippable. 1273 * 1274 * Must be updated for each Unicode version. 1275 */ 1276static void 1277initExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT]) { 1278 UErrorCode errorCode=U_ZERO_ERROR; 1279 1280 skipSets[UNORM_NFD].applyPattern(UnicodeString( 1281 "[^\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD" 1282 "\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD" 1283 "\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137" 1284 "\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165" 1285 "\\u0168-\\u017E\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-\\u01DC" 1286 "\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B" 1287 "\\u021E\\u021F\\u0226-\\u0233\\u0300-\\u034E\\u0350-\\u036F" 1288 "\\u0374\\u037E\\u0385-\\u038A\\u038C\\u038E-\\u0390\\u03AA-" 1289 "\\u03B0\\u03CA-\\u03CE\\u03D3\\u03D4\\u0400\\u0401\\u0403\\u0407" 1290 "\\u040C-\\u040E\\u0419\\u0439\\u0450\\u0451\\u0453\\u0457\\u045C" 1291 "-\\u045E\\u0476\\u0477\\u0483-\\u0487\\u04C1\\u04C2\\u04D0-" 1292 "\\u04D3\\u04D6\\u04D7\\u04DA-\\u04DF\\u04E2-\\u04E7\\u04EA-" 1293 "\\u04F5\\u04F8\\u04F9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4" 1294 "\\u05C5\\u05C7\\u0610-\\u061A\\u0622-\\u0626\\u064B-\\u065E" 1295 "\\u0670\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4" 1296 "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-" 1297 "\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-" 1298 "\\u082D\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958" 1299 "-\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33" 1300 "\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C" 1301 "\\u0B48\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD" 1302 "\\u0C48\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA" 1303 "\\u0CCB\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE" 1304 "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB" 1305 "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57" 1306 "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-" 1307 "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9" 1308 "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u135F\\u1714\\u1734" 1309 "\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75" 1310 "-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12\\u1B34" 1311 "\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73\\u1BAA" 1312 "\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8\\u1CED" 1313 "\\u1DC0-\\u1DE6\\u1DFD-\\u1E99\\u1E9B\\u1EA0-\\u1EF9\\u1F00-" 1314 "\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D\\u1F50-" 1315 "\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-\\u1FB4" 1316 "\\u1FB6-\\u1FBC\\u1FBE\\u1FC1-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-" 1317 "\\u1FDB\\u1FDD-\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFD\\u2000" 1318 "\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A" 1319 "\\u212B\\u219A\\u219B\\u21AE\\u21CD-\\u21CF\\u2204\\u2209\\u220C" 1320 "\\u2224\\u2226\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-" 1321 "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285" 1322 "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED" 1323 "\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-\\u2DFF\\u302A-" 1324 "\\u302F\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A" 1325 "\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071" 1326 "\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094" 1327 "\\u3099\\u309A\\u309E\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4\\u30B6" 1328 "\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7\\u30C9" 1329 "\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA\\u30DC" 1330 "\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\uA66F\\uA67C\\uA67D\\uA6F0" 1331 "\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D\\uA953" 1332 "\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF" 1333 "\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D\\uFA10\\uFA12" 1334 "\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D" 1335 "\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-\\uFB36" 1336 "\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-" 1337 "\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010A" 1338 "38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB" 1339 "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001" 1340 "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-" 1341 "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002" 1342 "F800-\\U0002FA1D]" 1343 , ""), errorCode); 1344 1345 skipSets[UNORM_NFC].applyPattern(UnicodeString( 1346 "[^<->A-PR-Za-pr-z\\u00A8\\u00C0-\\u00CF\\u00D1-\\u00D6\\u00D8-" 1347 "\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD\\u00FF-" 1348 "\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121\\u0124" 1349 "\\u0125\\u0128-\\u012D\\u0130\\u0139\\u013A\\u013D\\u013E\\u0143" 1350 "\\u0144\\u0147\\u0148\\u014C-\\u0151\\u0154\\u0155\\u0158-" 1351 "\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168-\\u0171\\u0174-" 1352 "\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7\\u01CD-\\u01DC\\u01DE" 1353 "-\\u01E1\\u01E6-\\u01EB\\u01F4\\u01F5\\u01F8-\\u01FB\\u0200-" 1354 "\\u021B\\u021E\\u021F\\u0226-\\u0233\\u0292\\u0300-\\u034E" 1355 "\\u0350-\\u036F\\u0374\\u037E\\u0387\\u0391\\u0395\\u0397\\u0399" 1356 "\\u039F\\u03A1\\u03A5\\u03A9\\u03AC\\u03AE\\u03B1\\u03B5\\u03B7" 1357 "\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-\\u03CB\\u03CE\\u03D2\\u0406" 1358 "\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423\\u0427\\u042B" 1359 "\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E\\u0443\\u0447" 1360 "\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487\\u04D8\\u04D9" 1361 "\\u04E8\\u04E9\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5" 1362 "\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627\\u0648\\u064A-" 1363 "\\u065E\\u0670\\u06C1\\u06D2\\u06D5-\\u06DC\\u06DF-\\u06E4" 1364 "\\u06E7\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-" 1365 "\\u07F3\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-" 1366 "\\u082D\\u0928\\u0930\\u0933\\u093C\\u094D\\u0951-\\u0954\\u0958" 1367 "-\\u095F\\u09BC\\u09BE\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF" 1368 "\\u0A33\\u0A36\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD" 1369 "\\u0B3C\\u0B3E\\u0B47\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92" 1370 "\\u0BBE\\u0BC6\\u0BC7\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56" 1371 "\\u0CBC\\u0CBF\\u0CC2\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E" 1372 "\\u0D46\\u0D47\\u0D4D\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF" 1373 "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB8\\u0EB9\\u0EC8-\\u0ECB" 1374 "\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D\\u0F52\\u0F57" 1375 "\\u0F5C\\u0F69\\u0F71-\\u0F76\\u0F78\\u0F7A-\\u0F7D\\u0F80-" 1376 "\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9" 1377 "\\u0FC6\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u1100-\\u1112" 1378 "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2" 1379 "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-" 1380 "\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34" 1381 "\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73" 1382 "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8" 1383 "\\u1CED\\u1DC0-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F\\u1E12-" 1384 "\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53\\u1E58-" 1385 "\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E99\\u1EA0-" 1386 "\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19\\u1F20-" 1387 "\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50\\u1F51" 1388 "\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79\\u1F7B-" 1389 "\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98\\u1F99" 1390 "\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB\\u1FBC\\u1FBE" 1391 "\\u1FBF\\u1FC3\\u1FC6\\u1FC9\\u1FCB\\u1FCC\\u1FD3\\u1FDB\\u1FE3" 1392 "\\u1FEB\\u1FEE\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB-\\u1FFE\\u2000" 1393 "\\u2001\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2126\\u212A" 1394 "\\u212B\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208" 1395 "\\u220B\\u2223\\u2225\\u223C\\u2243\\u2245\\u2248\\u224D\\u2261" 1396 "\\u2264\\u2265\\u2272\\u2273\\u2276\\u2277\\u227A-\\u227D\\u2282" 1397 "\\u2283\\u2286\\u2287\\u2291\\u2292\\u22A2\\u22A8\\u22A9\\u22AB" 1398 "\\u22B2-\\u22B5\\u2329\\u232A\\u2ADC\\u2CEF-\\u2CF1\\u2DE0-" 1399 "\\u2DFF\\u302A-\\u302F\\u3046\\u304B\\u304D\\u304F\\u3051\\u3053" 1400 "\\u3055\\u3057\\u3059\\u305B\\u305D\\u305F\\u3061\\u3064\\u3066" 1401 "\\u3068\\u306F\\u3072\\u3075\\u3078\\u307B\\u3099\\u309A\\u309D" 1402 "\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1\\u30B3\\u30B5\\u30B7\\u30B9" 1403 "\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4\\u30C6\\u30C8\\u30CF\\u30D2" 1404 "\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2\\u30FD\\uA66F\\uA67C\\uA67D" 1405 "\\uA6F0\\uA6F1\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-\\uA92D" 1406 "\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8\\uAABE" 1407 "\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C\\uAC38\\uAC54\\uAC70\\uAC8C" 1408 "\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18\\uAD34\\uAD50\\uAD6C\\uAD88" 1409 "\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14\\uAE30\\uAE4C\\uAE68\\uAE84" 1410 "\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10\\uAF2C\\uAF48\\uAF64\\uAF80" 1411 "\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C\\uB028\\uB044\\uB060\\uB07C" 1412 "\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108\\uB124\\uB140\\uB15C\\uB178" 1413 "\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204\\uB220\\uB23C\\uB258\\uB274" 1414 "\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300\\uB31C\\uB338\\uB354\\uB370" 1415 "\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC\\uB418\\uB434\\uB450\\uB46C" 1416 "\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8\\uB514\\uB530\\uB54C\\uB568" 1417 "\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4\\uB610\\uB62C\\uB648\\uB664" 1418 "\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0\\uB70C\\uB728\\uB744\\uB760" 1419 "\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC\\uB808\\uB824\\uB840\\uB85C" 1420 "\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8\\uB904\\uB920\\uB93C\\uB958" 1421 "\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4\\uBA00\\uBA1C\\uBA38\\uBA54" 1422 "\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0\\uBAFC\\uBB18\\uBB34\\uBB50" 1423 "\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC\\uBBF8\\uBC14\\uBC30\\uBC4C" 1424 "\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8\\uBCF4\\uBD10\\uBD2C\\uBD48" 1425 "\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4\\uBDF0\\uBE0C\\uBE28\\uBE44" 1426 "\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0\\uBEEC\\uBF08\\uBF24\\uBF40" 1427 "\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC\\uBFE8\\uC004\\uC020\\uC03C" 1428 "\\uC058\\uC074\\uC090\\uC0AC\\uC0C8\\uC0E4\\uC100\\uC11C\\uC138" 1429 "\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4\\uC1E0\\uC1FC\\uC218\\uC234" 1430 "\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0\\uC2DC\\uC2F8\\uC314\\uC330" 1431 "\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC\\uC3D8\\uC3F4\\uC410\\uC42C" 1432 "\\uC448\\uC464\\uC480\\uC49C\\uC4B8\\uC4D4\\uC4F0\\uC50C\\uC528" 1433 "\\uC544\\uC560\\uC57C\\uC598\\uC5B4\\uC5D0\\uC5EC\\uC608\\uC624" 1434 "\\uC640\\uC65C\\uC678\\uC694\\uC6B0\\uC6CC\\uC6E8\\uC704\\uC720" 1435 "\\uC73C\\uC758\\uC774\\uC790\\uC7AC\\uC7C8\\uC7E4\\uC800\\uC81C" 1436 "\\uC838\\uC854\\uC870\\uC88C\\uC8A8\\uC8C4\\uC8E0\\uC8FC\\uC918" 1437 "\\uC934\\uC950\\uC96C\\uC988\\uC9A4\\uC9C0\\uC9DC\\uC9F8\\uCA14" 1438 "\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0\\uCABC\\uCAD8\\uCAF4\\uCB10" 1439 "\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C\\uCBB8\\uCBD4\\uCBF0\\uCC0C" 1440 "\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98\\uCCB4\\uCCD0\\uCCEC\\uCD08" 1441 "\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94\\uCDB0\\uCDCC\\uCDE8\\uCE04" 1442 "\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90\\uCEAC\\uCEC8\\uCEE4\\uCF00" 1443 "\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C\\uCFA8\\uCFC4\\uCFE0\\uCFFC" 1444 "\\uD018\\uD034\\uD050\\uD06C\\uD088\\uD0A4\\uD0C0\\uD0DC\\uD0F8" 1445 "\\uD114\\uD130\\uD14C\\uD168\\uD184\\uD1A0\\uD1BC\\uD1D8\\uD1F4" 1446 "\\uD210\\uD22C\\uD248\\uD264\\uD280\\uD29C\\uD2B8\\uD2D4\\uD2F0" 1447 "\\uD30C\\uD328\\uD344\\uD360\\uD37C\\uD398\\uD3B4\\uD3D0\\uD3EC" 1448 "\\uD408\\uD424\\uD440\\uD45C\\uD478\\uD494\\uD4B0\\uD4CC\\uD4E8" 1449 "\\uD504\\uD520\\uD53C\\uD558\\uD574\\uD590\\uD5AC\\uD5C8\\uD5E4" 1450 "\\uD600\\uD61C\\uD638\\uD654\\uD670\\uD68C\\uD6A8\\uD6C4\\uD6E0" 1451 "\\uD6FC\\uD718\\uD734\\uD750\\uD76C\\uD788\\uF900-\\uFA0D\\uFA10" 1452 "\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A-" 1453 "\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB1D-\\uFB1F\\uFB2A-" 1454 "\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46" 1455 "-\\uFB4E\\uFE20-\\uFE26\\U000101FD\\U00010A0D\\U00010A0F\\U00010" 1456 "A38-\\U00010A3A\\U00010A3F\\U00011099\\U0001109B\\U000110A5" 1457 "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001" 1458 "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-" 1459 "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0002" 1460 "F800-\\U0002FA1D]" 1461 , ""), errorCode); 1462 1463 skipSets[UNORM_NFKD].applyPattern(UnicodeString( 1464 "[^\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5\\u00B8-\\u00BA" 1465 "\\u00BC-\\u00BE\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6" 1466 "\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6" 1467 "\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130" 1468 "\\u0132-\\u0137\\u0139-\\u0140\\u0143-\\u0149\\u014C-\\u0151" 1469 "\\u0154-\\u0165\\u0168-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0" 1470 "\\u01C4-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F5\\u01F8-\\u021B" 1471 "\\u021E\\u021F\\u0226-\\u0233\\u02B0-\\u02B8\\u02D8-\\u02DD" 1472 "\\u02E0-\\u02E4\\u0300-\\u034E\\u0350-\\u036F\\u0374\\u037A" 1473 "\\u037E\\u0384-\\u038A\\u038C\\u038E-\\u0390\\u03AA-\\u03B0" 1474 "\\u03CA-\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5" 1475 "\\u03F9\\u0400\\u0401\\u0403\\u0407\\u040C-\\u040E\\u0419\\u0439" 1476 "\\u0450\\u0451\\u0453\\u0457\\u045C-\\u045E\\u0476\\u0477\\u0483" 1477 "-\\u0487\\u04C1\\u04C2\\u04D0-\\u04D3\\u04D6\\u04D7\\u04DA-" 1478 "\\u04DF\\u04E2-\\u04E7\\u04EA-\\u04F5\\u04F8\\u04F9\\u0587" 1479 "\\u0591-\\u05BD\\u05BF\\u05C1\\u05C2\\u05C4\\u05C5\\u05C7\\u0610" 1480 "-\\u061A\\u0622-\\u0626\\u064B-\\u065E\\u0670\\u0675-\\u0678" 1481 "\\u06C0\\u06C2\\u06D3\\u06D6-\\u06DC\\u06DF-\\u06E4\\u06E7" 1482 "\\u06E8\\u06EA-\\u06ED\\u0711\\u0730-\\u074A\\u07EB-\\u07F3" 1483 "\\u0816-\\u0819\\u081B-\\u0823\\u0825-\\u0827\\u0829-\\u082D" 1484 "\\u0929\\u0931\\u0934\\u093C\\u094D\\u0951-\\u0954\\u0958-" 1485 "\\u095F\\u09BC\\u09CB-\\u09CD\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36" 1486 "\\u0A3C\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B48" 1487 "\\u0B4B-\\u0B4D\\u0B5C\\u0B5D\\u0B94\\u0BCA-\\u0BCD\\u0C48" 1488 "\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CC0\\u0CC7\\u0CC8\\u0CCA\\u0CCB" 1489 "\\u0CCD\\u0D4A-\\u0D4D\\u0DCA\\u0DDA\\u0DDC-\\u0DDE\\u0E33" 1490 "\\u0E38-\\u0E3A\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-" 1491 "\\u0ECB\\u0EDC\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39" 1492 "\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80" 1493 "-\\u0F84\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9" 1494 "\\u0FC6\\u1026\\u1037\\u1039\\u103A\\u108D\\u10FC\\u135F\\u1714" 1495 "\\u1734\\u17D2\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60" 1496 "\\u1A75-\\u1A7C\\u1A7F\\u1B06\\u1B08\\u1B0A\\u1B0C\\u1B0E\\u1B12" 1497 "\\u1B34\\u1B3B\\u1B3D\\u1B40\\u1B41\\u1B43\\u1B44\\u1B6B-\\u1B73" 1498 "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8" 1499 "\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-" 1500 "\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E9B\\u1EA0-\\u1EF9" 1501 "\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D" 1502 "\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F7D\\u1F80-" 1503 "\\u1FB4\\u1FB6-\\u1FC4\\u1FC6-\\u1FD3\\u1FD6-\\u1FDB\\u1FDD-" 1504 "\\u1FEF\\u1FF2-\\u1FF4\\u1FF6-\\u1FFE\\u2000-\\u200A\\u2011" 1505 "\\u2017\\u2024-\\u2026\\u202F\\u2033\\u2034\\u2036\\u2037\\u203C" 1506 "\\u203E\\u2047-\\u2049\\u2057\\u205F\\u2070\\u2071\\u2074-" 1507 "\\u208E\\u2090-\\u2094\\u20A8\\u20D0-\\u20DC\\u20E1\\u20E5-" 1508 "\\u20F0\\u2100-\\u2103\\u2105-\\u2107\\u2109-\\u2113\\u2115" 1509 "\\u2116\\u2119-\\u211D\\u2120-\\u2122\\u2124\\u2126\\u2128" 1510 "\\u212A-\\u212D\\u212F-\\u2131\\u2133-\\u2139\\u213B-\\u2140" 1511 "\\u2145-\\u2149\\u2150-\\u217F\\u2189\\u219A\\u219B\\u21AE" 1512 "\\u21CD-\\u21CF\\u2204\\u2209\\u220C\\u2224\\u2226\\u222C\\u222D" 1513 "\\u222F\\u2230\\u2241\\u2244\\u2247\\u2249\\u2260\\u2262\\u226D-" 1514 "\\u2271\\u2274\\u2275\\u2278\\u2279\\u2280\\u2281\\u2284\\u2285" 1515 "\\u2288\\u2289\\u22AC-\\u22AF\\u22E0-\\u22E3\\u22EA-\\u22ED" 1516 "\\u2329\\u232A\\u2460-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC" 1517 "\\u2C7C\\u2C7D\\u2CEF-\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F" 1518 "\\u2EF3\\u2F00-\\u2FD5\\u3000\\u302A-\\u302F\\u3036\\u3038-" 1519 "\\u303A\\u304C\\u304E\\u3050\\u3052\\u3054\\u3056\\u3058\\u305A" 1520 "\\u305C\\u305E\\u3060\\u3062\\u3065\\u3067\\u3069\\u3070\\u3071" 1521 "\\u3073\\u3074\\u3076\\u3077\\u3079\\u307A\\u307C\\u307D\\u3094" 1522 "\\u3099-\\u309C\\u309E\\u309F\\u30AC\\u30AE\\u30B0\\u30B2\\u30B4" 1523 "\\u30B6\\u30B8\\u30BA\\u30BC\\u30BE\\u30C0\\u30C2\\u30C5\\u30C7" 1524 "\\u30C9\\u30D0\\u30D1\\u30D3\\u30D4\\u30D6\\u30D7\\u30D9\\u30DA" 1525 "\\u30DC\\u30DD\\u30F4\\u30F7-\\u30FA\\u30FE\\u30FF\\u3131-" 1526 "\\u318E\\u3192-\\u319F\\u3200-\\u321E\\u3220-\\u3247\\u3250-" 1527 "\\u327E\\u3280-\\u32FE\\u3300-\\u33FF\\uA66F\\uA67C\\uA67D" 1528 "\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-\\uA8F1\\uA92B-" 1529 "\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-\\uAAB4\\uAAB7\\uAAB8" 1530 "\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00-\\uD7A3\\uF900-\\uFA0D" 1531 "\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A" 1532 "-\\uFA2D\\uFA30-\\uFA6D\\uFA70-\\uFAD9\\uFB00-\\uFB06\\uFB13-" 1533 "\\uFB17\\uFB1D-\\uFB36\\uFB38-\\uFB3C\\uFB3E\\uFB40\\uFB41" 1534 "\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3-\\uFD3D\\uFD50-\\uFD8F" 1535 "\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-\\uFE19\\uFE20-\\uFE26" 1536 "\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-\\uFE66\\uFE68-\\uFE6B" 1537 "\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC\\uFF01-\\uFFBE\\uFFC2-" 1538 "\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7\\uFFDA-\\uFFDC\\uFFE0-" 1539 "\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010A0D\\U00010A0F\\U00010A" 1540 "38-\\U00010A3A\\U00010A3F\\U0001109A\\U0001109C\\U000110AB" 1541 "\\U000110B9\\U000110BA\\U0001D15E-\\U0001D169\\U0001D16D-\\U0001" 1542 "D172\\U0001D17B-\\U0001D182\\U0001D185-\\U0001D18B\\U0001D1AA-" 1543 "\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001D242-\\U0001D244\\U0001" 1544 "D400-\\U0001D454\\U0001D456-\\U0001D49C\\U0001D49E\\U0001D49F" 1545 "\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4A9-\\U0001D4AC\\U0001D" 1546 "4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-\\U0001D4C3\\U0001D4C5-" 1547 "\\U0001D505\\U0001D507-\\U0001D50A\\U0001D50D-\\U0001D514\\U0001" 1548 "D516-\\U0001D51C\\U0001D51E-\\U0001D539\\U0001D53B-\\U0001D53E" 1549 "\\U0001D540-\\U0001D544\\U0001D546\\U0001D54A-\\U0001D550\\U0001" 1550 "D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB\\U0001D7CE-\\U0001D7FF" 1551 "\\U0001F100-\\U0001F10A\\U0001F110-\\U0001F12E\\U0001F131\\U0001" 1552 "F13D\\U0001F13F\\U0001F142\\U0001F146\\U0001F14A-\\U0001F14E" 1553 "\\U0001F190\\U0001F200\\U0001F210-\\U0001F231\\U0001F240-\\U0001" 1554 "F248\\U0002F800-\\U0002FA1D]" 1555 , ""), errorCode); 1556 1557 skipSets[UNORM_NFKC].applyPattern(UnicodeString( 1558 "[^<->A-PR-Za-pr-z\\u00A0\\u00A8\\u00AA\\u00AF\\u00B2-\\u00B5" 1559 "\\u00B8-\\u00BA\\u00BC-\\u00BE\\u00C0-\\u00CF\\u00D1-\\u00D6" 1560 "\\u00D8-\\u00DD\\u00E0-\\u00EF\\u00F1-\\u00F6\\u00F8-\\u00FD" 1561 "\\u00FF-\\u0103\\u0106-\\u010F\\u0112-\\u0117\\u011A-\\u0121" 1562 "\\u0124\\u0125\\u0128-\\u012D\\u0130\\u0132\\u0133\\u0139\\u013A" 1563 "\\u013D-\\u0140\\u0143\\u0144\\u0147-\\u0149\\u014C-\\u0151" 1564 "\\u0154\\u0155\\u0158-\\u015D\\u0160\\u0161\\u0164\\u0165\\u0168" 1565 "-\\u0171\\u0174-\\u017F\\u01A0\\u01A1\\u01AF\\u01B0\\u01B7" 1566 "\\u01C4-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01EB\\u01F1-\\u01F5" 1567 "\\u01F8-\\u01FB\\u0200-\\u021B\\u021E\\u021F\\u0226-\\u0233" 1568 "\\u0292\\u02B0-\\u02B8\\u02D8-\\u02DD\\u02E0-\\u02E4\\u0300-" 1569 "\\u034E\\u0350-\\u036F\\u0374\\u037A\\u037E\\u0384\\u0385\\u0387" 1570 "\\u0391\\u0395\\u0397\\u0399\\u039F\\u03A1\\u03A5\\u03A9\\u03AC" 1571 "\\u03AE\\u03B1\\u03B5\\u03B7\\u03B9\\u03BF\\u03C1\\u03C5\\u03C9-" 1572 "\\u03CB\\u03CE\\u03D0-\\u03D6\\u03F0-\\u03F2\\u03F4\\u03F5" 1573 "\\u03F9\\u0406\\u0410\\u0413\\u0415-\\u0418\\u041A\\u041E\\u0423" 1574 "\\u0427\\u042B\\u042D\\u0430\\u0433\\u0435-\\u0438\\u043A\\u043E" 1575 "\\u0443\\u0447\\u044B\\u044D\\u0456\\u0474\\u0475\\u0483-\\u0487" 1576 "\\u04D8\\u04D9\\u04E8\\u04E9\\u0587\\u0591-\\u05BD\\u05BF\\u05C1" 1577 "\\u05C2\\u05C4\\u05C5\\u05C7\\u0610-\\u061A\\u0622\\u0623\\u0627" 1578 "\\u0648\\u064A-\\u065E\\u0670\\u0675-\\u0678\\u06C1\\u06D2" 1579 "\\u06D5-\\u06DC\\u06DF-\\u06E4\\u06E7\\u06E8\\u06EA-\\u06ED" 1580 "\\u0711\\u0730-\\u074A\\u07EB-\\u07F3\\u0816-\\u0819\\u081B-" 1581 "\\u0823\\u0825-\\u0827\\u0829-\\u082D\\u0928\\u0930\\u0933" 1582 "\\u093C\\u094D\\u0951-\\u0954\\u0958-\\u095F\\u09BC\\u09BE" 1583 "\\u09C7\\u09CD\\u09D7\\u09DC\\u09DD\\u09DF\\u0A33\\u0A36\\u0A3C" 1584 "\\u0A4D\\u0A59-\\u0A5B\\u0A5E\\u0ABC\\u0ACD\\u0B3C\\u0B3E\\u0B47" 1585 "\\u0B4D\\u0B56\\u0B57\\u0B5C\\u0B5D\\u0B92\\u0BBE\\u0BC6\\u0BC7" 1586 "\\u0BCD\\u0BD7\\u0C46\\u0C4D\\u0C55\\u0C56\\u0CBC\\u0CBF\\u0CC2" 1587 "\\u0CC6\\u0CCA\\u0CCD\\u0CD5\\u0CD6\\u0D3E\\u0D46\\u0D47\\u0D4D" 1588 "\\u0D57\\u0DCA\\u0DCF\\u0DD9\\u0DDC\\u0DDF\\u0E33\\u0E38-\\u0E3A" 1589 "\\u0E48-\\u0E4B\\u0EB3\\u0EB8\\u0EB9\\u0EC8-\\u0ECB\\u0EDC" 1590 "\\u0EDD\\u0F0C\\u0F18\\u0F19\\u0F35\\u0F37\\u0F39\\u0F43\\u0F4D" 1591 "\\u0F52\\u0F57\\u0F5C\\u0F69\\u0F71-\\u0F7D\\u0F80-\\u0F84" 1592 "\\u0F86\\u0F87\\u0F93\\u0F9D\\u0FA2\\u0FA7\\u0FAC\\u0FB9\\u0FC6" 1593 "\\u1025\\u102E\\u1037\\u1039\\u103A\\u108D\\u10FC\\u1100-\\u1112" 1594 "\\u1161-\\u1175\\u11A8-\\u11C2\\u135F\\u1714\\u1734\\u17D2" 1595 "\\u17DD\\u18A9\\u1939-\\u193B\\u1A17\\u1A18\\u1A60\\u1A75-" 1596 "\\u1A7C\\u1A7F\\u1B05\\u1B07\\u1B09\\u1B0B\\u1B0D\\u1B11\\u1B34" 1597 "\\u1B35\\u1B3A\\u1B3C\\u1B3E\\u1B3F\\u1B42\\u1B44\\u1B6B-\\u1B73" 1598 "\\u1BAA\\u1C37\\u1CD0-\\u1CD2\\u1CD4-\\u1CE0\\u1CE2-\\u1CE8" 1599 "\\u1CED\\u1D2C-\\u1D2E\\u1D30-\\u1D3A\\u1D3C-\\u1D4D\\u1D4F-" 1600 "\\u1D6A\\u1D78\\u1D9B-\\u1DE6\\u1DFD-\\u1E03\\u1E0A-\\u1E0F" 1601 "\\u1E12-\\u1E1B\\u1E20-\\u1E27\\u1E2A-\\u1E41\\u1E44-\\u1E53" 1602 "\\u1E58-\\u1E7D\\u1E80-\\u1E87\\u1E8E-\\u1E91\\u1E96-\\u1E9B" 1603 "\\u1EA0-\\u1EF3\\u1EF6-\\u1EF9\\u1F00-\\u1F11\\u1F18\\u1F19" 1604 "\\u1F20-\\u1F31\\u1F38\\u1F39\\u1F40\\u1F41\\u1F48\\u1F49\\u1F50" 1605 "\\u1F51\\u1F59\\u1F60-\\u1F71\\u1F73-\\u1F75\\u1F77\\u1F79" 1606 "\\u1F7B-\\u1F7D\\u1F80\\u1F81\\u1F88\\u1F89\\u1F90\\u1F91\\u1F98" 1607 "\\u1F99\\u1FA0\\u1FA1\\u1FA8\\u1FA9\\u1FB3\\u1FB6\\u1FBB-\\u1FC1" 1608 "\\u1FC3\\u1FC6\\u1FC9\\u1FCB-\\u1FCF\\u1FD3\\u1FDB\\u1FDD-" 1609 "\\u1FDF\\u1FE3\\u1FEB\\u1FED-\\u1FEF\\u1FF3\\u1FF6\\u1FF9\\u1FFB" 1610 "-\\u1FFE\\u2000-\\u200A\\u2011\\u2017\\u2024-\\u2026\\u202F" 1611 "\\u2033\\u2034\\u2036\\u2037\\u203C\\u203E\\u2047-\\u2049\\u2057" 1612 "\\u205F\\u2070\\u2071\\u2074-\\u208E\\u2090-\\u2094\\u20A8" 1613 "\\u20D0-\\u20DC\\u20E1\\u20E5-\\u20F0\\u2100-\\u2103\\u2105-" 1614 "\\u2107\\u2109-\\u2113\\u2115\\u2116\\u2119-\\u211D\\u2120-" 1615 "\\u2122\\u2124\\u2126\\u2128\\u212A-\\u212D\\u212F-\\u2131" 1616 "\\u2133-\\u2139\\u213B-\\u2140\\u2145-\\u2149\\u2150-\\u217F" 1617 "\\u2189\\u2190\\u2192\\u2194\\u21D0\\u21D2\\u21D4\\u2203\\u2208" 1618 "\\u220B\\u2223\\u2225\\u222C\\u222D\\u222F\\u2230\\u223C\\u2243" 1619 "\\u2245\\u2248\\u224D\\u2261\\u2264\\u2265\\u2272\\u2273\\u2276" 1620 "\\u2277\\u227A-\\u227D\\u2282\\u2283\\u2286\\u2287\\u2291\\u2292" 1621 "\\u22A2\\u22A8\\u22A9\\u22AB\\u22B2-\\u22B5\\u2329\\u232A\\u2460" 1622 "-\\u24EA\\u2A0C\\u2A74-\\u2A76\\u2ADC\\u2C7C\\u2C7D\\u2CEF-" 1623 "\\u2CF1\\u2D6F\\u2DE0-\\u2DFF\\u2E9F\\u2EF3\\u2F00-\\u2FD5" 1624 "\\u3000\\u302A-\\u302F\\u3036\\u3038-\\u303A\\u3046\\u304B" 1625 "\\u304D\\u304F\\u3051\\u3053\\u3055\\u3057\\u3059\\u305B\\u305D" 1626 "\\u305F\\u3061\\u3064\\u3066\\u3068\\u306F\\u3072\\u3075\\u3078" 1627 "\\u307B\\u3099-\\u309D\\u309F\\u30A6\\u30AB\\u30AD\\u30AF\\u30B1" 1628 "\\u30B3\\u30B5\\u30B7\\u30B9\\u30BB\\u30BD\\u30BF\\u30C1\\u30C4" 1629 "\\u30C6\\u30C8\\u30CF\\u30D2\\u30D5\\u30D8\\u30DB\\u30EF-\\u30F2" 1630 "\\u30FD\\u30FF\\u3131-\\u318E\\u3192-\\u319F\\u3200-\\u321E" 1631 "\\u3220-\\u3247\\u3250-\\u327E\\u3280-\\u32FE\\u3300-\\u33FF" 1632 "\\uA66F\\uA67C\\uA67D\\uA6F0\\uA6F1\\uA770\\uA806\\uA8C4\\uA8E0-" 1633 "\\uA8F1\\uA92B-\\uA92D\\uA953\\uA9B3\\uA9C0\\uAAB0\\uAAB2-" 1634 "\\uAAB4\\uAAB7\\uAAB8\\uAABE\\uAABF\\uAAC1\\uABED\\uAC00\\uAC1C" 1635 "\\uAC38\\uAC54\\uAC70\\uAC8C\\uACA8\\uACC4\\uACE0\\uACFC\\uAD18" 1636 "\\uAD34\\uAD50\\uAD6C\\uAD88\\uADA4\\uADC0\\uADDC\\uADF8\\uAE14" 1637 "\\uAE30\\uAE4C\\uAE68\\uAE84\\uAEA0\\uAEBC\\uAED8\\uAEF4\\uAF10" 1638 "\\uAF2C\\uAF48\\uAF64\\uAF80\\uAF9C\\uAFB8\\uAFD4\\uAFF0\\uB00C" 1639 "\\uB028\\uB044\\uB060\\uB07C\\uB098\\uB0B4\\uB0D0\\uB0EC\\uB108" 1640 "\\uB124\\uB140\\uB15C\\uB178\\uB194\\uB1B0\\uB1CC\\uB1E8\\uB204" 1641 "\\uB220\\uB23C\\uB258\\uB274\\uB290\\uB2AC\\uB2C8\\uB2E4\\uB300" 1642 "\\uB31C\\uB338\\uB354\\uB370\\uB38C\\uB3A8\\uB3C4\\uB3E0\\uB3FC" 1643 "\\uB418\\uB434\\uB450\\uB46C\\uB488\\uB4A4\\uB4C0\\uB4DC\\uB4F8" 1644 "\\uB514\\uB530\\uB54C\\uB568\\uB584\\uB5A0\\uB5BC\\uB5D8\\uB5F4" 1645 "\\uB610\\uB62C\\uB648\\uB664\\uB680\\uB69C\\uB6B8\\uB6D4\\uB6F0" 1646 "\\uB70C\\uB728\\uB744\\uB760\\uB77C\\uB798\\uB7B4\\uB7D0\\uB7EC" 1647 "\\uB808\\uB824\\uB840\\uB85C\\uB878\\uB894\\uB8B0\\uB8CC\\uB8E8" 1648 "\\uB904\\uB920\\uB93C\\uB958\\uB974\\uB990\\uB9AC\\uB9C8\\uB9E4" 1649 "\\uBA00\\uBA1C\\uBA38\\uBA54\\uBA70\\uBA8C\\uBAA8\\uBAC4\\uBAE0" 1650 "\\uBAFC\\uBB18\\uBB34\\uBB50\\uBB6C\\uBB88\\uBBA4\\uBBC0\\uBBDC" 1651 "\\uBBF8\\uBC14\\uBC30\\uBC4C\\uBC68\\uBC84\\uBCA0\\uBCBC\\uBCD8" 1652 "\\uBCF4\\uBD10\\uBD2C\\uBD48\\uBD64\\uBD80\\uBD9C\\uBDB8\\uBDD4" 1653 "\\uBDF0\\uBE0C\\uBE28\\uBE44\\uBE60\\uBE7C\\uBE98\\uBEB4\\uBED0" 1654 "\\uBEEC\\uBF08\\uBF24\\uBF40\\uBF5C\\uBF78\\uBF94\\uBFB0\\uBFCC" 1655 "\\uBFE8\\uC004\\uC020\\uC03C\\uC058\\uC074\\uC090\\uC0AC\\uC0C8" 1656 "\\uC0E4\\uC100\\uC11C\\uC138\\uC154\\uC170\\uC18C\\uC1A8\\uC1C4" 1657 "\\uC1E0\\uC1FC\\uC218\\uC234\\uC250\\uC26C\\uC288\\uC2A4\\uC2C0" 1658 "\\uC2DC\\uC2F8\\uC314\\uC330\\uC34C\\uC368\\uC384\\uC3A0\\uC3BC" 1659 "\\uC3D8\\uC3F4\\uC410\\uC42C\\uC448\\uC464\\uC480\\uC49C\\uC4B8" 1660 "\\uC4D4\\uC4F0\\uC50C\\uC528\\uC544\\uC560\\uC57C\\uC598\\uC5B4" 1661 "\\uC5D0\\uC5EC\\uC608\\uC624\\uC640\\uC65C\\uC678\\uC694\\uC6B0" 1662 "\\uC6CC\\uC6E8\\uC704\\uC720\\uC73C\\uC758\\uC774\\uC790\\uC7AC" 1663 "\\uC7C8\\uC7E4\\uC800\\uC81C\\uC838\\uC854\\uC870\\uC88C\\uC8A8" 1664 "\\uC8C4\\uC8E0\\uC8FC\\uC918\\uC934\\uC950\\uC96C\\uC988\\uC9A4" 1665 "\\uC9C0\\uC9DC\\uC9F8\\uCA14\\uCA30\\uCA4C\\uCA68\\uCA84\\uCAA0" 1666 "\\uCABC\\uCAD8\\uCAF4\\uCB10\\uCB2C\\uCB48\\uCB64\\uCB80\\uCB9C" 1667 "\\uCBB8\\uCBD4\\uCBF0\\uCC0C\\uCC28\\uCC44\\uCC60\\uCC7C\\uCC98" 1668 "\\uCCB4\\uCCD0\\uCCEC\\uCD08\\uCD24\\uCD40\\uCD5C\\uCD78\\uCD94" 1669 "\\uCDB0\\uCDCC\\uCDE8\\uCE04\\uCE20\\uCE3C\\uCE58\\uCE74\\uCE90" 1670 "\\uCEAC\\uCEC8\\uCEE4\\uCF00\\uCF1C\\uCF38\\uCF54\\uCF70\\uCF8C" 1671 "\\uCFA8\\uCFC4\\uCFE0\\uCFFC\\uD018\\uD034\\uD050\\uD06C\\uD088" 1672 "\\uD0A4\\uD0C0\\uD0DC\\uD0F8\\uD114\\uD130\\uD14C\\uD168\\uD184" 1673 "\\uD1A0\\uD1BC\\uD1D8\\uD1F4\\uD210\\uD22C\\uD248\\uD264\\uD280" 1674 "\\uD29C\\uD2B8\\uD2D4\\uD2F0\\uD30C\\uD328\\uD344\\uD360\\uD37C" 1675 "\\uD398\\uD3B4\\uD3D0\\uD3EC\\uD408\\uD424\\uD440\\uD45C\\uD478" 1676 "\\uD494\\uD4B0\\uD4CC\\uD4E8\\uD504\\uD520\\uD53C\\uD558\\uD574" 1677 "\\uD590\\uD5AC\\uD5C8\\uD5E4\\uD600\\uD61C\\uD638\\uD654\\uD670" 1678 "\\uD68C\\uD6A8\\uD6C4\\uD6E0\\uD6FC\\uD718\\uD734\\uD750\\uD76C" 1679 "\\uD788\\uF900-\\uFA0D\\uFA10\\uFA12\\uFA15-\\uFA1E\\uFA20" 1680 "\\uFA22\\uFA25\\uFA26\\uFA2A-\\uFA2D\\uFA30-\\uFA6D\\uFA70-" 1681 "\\uFAD9\\uFB00-\\uFB06\\uFB13-\\uFB17\\uFB1D-\\uFB36\\uFB38-" 1682 "\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46-\\uFBB1\\uFBD3" 1683 "-\\uFD3D\\uFD50-\\uFD8F\\uFD92-\\uFDC7\\uFDF0-\\uFDFC\\uFE10-" 1684 "\\uFE19\\uFE20-\\uFE26\\uFE30-\\uFE44\\uFE47-\\uFE52\\uFE54-" 1685 "\\uFE66\\uFE68-\\uFE6B\\uFE70-\\uFE72\\uFE74\\uFE76-\\uFEFC" 1686 "\\uFF01-\\uFFBE\\uFFC2-\\uFFC7\\uFFCA-\\uFFCF\\uFFD2-\\uFFD7" 1687 "\\uFFDA-\\uFFDC\\uFFE0-\\uFFE6\\uFFE8-\\uFFEE\\U000101FD\\U00010" 1688 "A0D\\U00010A0F\\U00010A38-\\U00010A3A\\U00010A3F\\U00011099" 1689 "\\U0001109B\\U000110A5\\U000110B9\\U000110BA\\U0001D15E-\\U0001D" 1690 "169\\U0001D16D-\\U0001D172\\U0001D17B-\\U0001D182\\U0001D185-" 1691 "\\U0001D18B\\U0001D1AA-\\U0001D1AD\\U0001D1BB-\\U0001D1C0\\U0001" 1692 "D242-\\U0001D244\\U0001D400-\\U0001D454\\U0001D456-\\U0001D49C" 1693 "\\U0001D49E\\U0001D49F\\U0001D4A2\\U0001D4A5\\U0001D4A6\\U0001D4" 1694 "A9-\\U0001D4AC\\U0001D4AE-\\U0001D4B9\\U0001D4BB\\U0001D4BD-" 1695 "\\U0001D4C3\\U0001D4C5-\\U0001D505\\U0001D507-\\U0001D50A\\U0001" 1696 "D50D-\\U0001D514\\U0001D516-\\U0001D51C\\U0001D51E-\\U0001D539" 1697 "\\U0001D53B-\\U0001D53E\\U0001D540-\\U0001D544\\U0001D546\\U0001" 1698 "D54A-\\U0001D550\\U0001D552-\\U0001D6A5\\U0001D6A8-\\U0001D7CB" 1699 "\\U0001D7CE-\\U0001D7FF\\U0001F100-\\U0001F10A\\U0001F110-" 1700 "\\U0001F12E\\U0001F131\\U0001F13D\\U0001F13F\\U0001F142\\U0001F1" 1701 "46\\U0001F14A-\\U0001F14E\\U0001F190\\U0001F200\\U0001F210-" 1702 "\\U0001F231\\U0001F240-\\U0001F248\\U0002F800-\\U0002FA1D]" 1703 , ""), errorCode); 1704} 1705 1706U_CDECL_BEGIN 1707 1708// USetAdder implementation 1709// Does not use uset.h to reduce code dependencies 1710static void U_CALLCONV 1711_set_add(USet *set, UChar32 c) { 1712 uset_add(set, c); 1713} 1714 1715static void U_CALLCONV 1716_set_addRange(USet *set, UChar32 start, UChar32 end) { 1717 uset_addRange(set, start, end); 1718} 1719 1720static void U_CALLCONV 1721_set_addString(USet *set, const UChar *str, int32_t length) { 1722 uset_addString(set, str, length); 1723} 1724 1725U_CDECL_END 1726 1727void 1728BasicNormalizerTest::TestSkippable() { 1729 UnicodeSet diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT]; 1730 UnicodeString s, pattern; 1731 1732 /* build NF*Skippable sets from runtime data */ 1733 IcuTestErrorCode errorCode(*this, "TestSkippable"); 1734 skipSets[UNORM_NFD].applyPattern(UNICODE_STRING_SIMPLE("[:NFD_Inert:]"), errorCode); 1735 skipSets[UNORM_NFKD].applyPattern(UNICODE_STRING_SIMPLE("[:NFKD_Inert:]"), errorCode); 1736 skipSets[UNORM_NFC].applyPattern(UNICODE_STRING_SIMPLE("[:NFC_Inert:]"), errorCode); 1737 skipSets[UNORM_NFKC].applyPattern(UNICODE_STRING_SIMPLE("[:NFKC_Inert:]"), errorCode); 1738 if(errorCode.logDataIfFailureAndReset("UnicodeSet(NF..._Inert) failed")) { 1739 return; 1740 } 1741 1742 /* get expected sets from hardcoded patterns */ 1743 initExpectedSkippables(expectSets); 1744 1745 for(int32_t i=UNORM_NONE; i<UNORM_MODE_COUNT; ++i) { 1746 if(skipSets[i]!=expectSets[i]) { 1747 errln("error: TestSkippable skipSets[%d]!=expectedSets[%d]\n" 1748 "may need to update hardcoded UnicodeSet patterns in\n" 1749 "tstnorm.cpp/initExpectedSkippables(),\n" 1750 "see ICU4J - unicodetools.com.ibm.text.UCD.NFSkippable\n", 1751 i, i); 1752 1753 s=UNICODE_STRING_SIMPLE("skip-expect="); 1754 (diff=skipSets[i]).removeAll(expectSets[i]).toPattern(pattern, TRUE); 1755 s.append(pattern); 1756 1757 pattern.remove(); 1758 s.append(UNICODE_STRING_SIMPLE("\n\nexpect-skip=")); 1759 (diff=expectSets[i]).removeAll(skipSets[i]).toPattern(pattern, TRUE); 1760 s.append(pattern); 1761 s.append(UNICODE_STRING_SIMPLE("\n\n")); 1762 1763 errln(s); 1764 } 1765 } 1766} 1767 1768struct StringPair { const char *input, *expected; }; 1769 1770void 1771BasicNormalizerTest::TestCustomComp() { 1772 static const StringPair pairs[]={ 1773 { "\\uD801\\uE000\\uDFFE", "" }, 1774 { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, 1775 { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, 1776 { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" }, 1777 { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, 1778 { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, 1779 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, 1780 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" } 1781 }; 1782 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomComp"); 1783 const Normalizer2 *customNorm2= 1784 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", 1785 UNORM2_COMPOSE, errorCode); 1786 if(errorCode.logIfFailureAndReset("unable to load testdata/testnorm.nrm")) { 1787 return; 1788 } 1789 for(int32_t i=0; i<LENGTHOF(pairs); ++i) { 1790 const StringPair &pair=pairs[i]; 1791 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); 1792 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape(); 1793 UnicodeString result=customNorm2->normalize(input, errorCode); 1794 if(result!=expected) { 1795 errln("custom compose Normalizer2 did not normalize input %d as expected", i); 1796 } 1797 } 1798} 1799 1800void 1801BasicNormalizerTest::TestCustomFCC() { 1802 static const StringPair pairs[]={ 1803 { "\\uD801\\uE000\\uDFFE", "" }, 1804 { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, 1805 { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, 1806 // The following expected result is different from CustomComp 1807 // because of only-contiguous composition. 1808 { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" }, 1809 { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, 1810 { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, 1811 { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, 1812 { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" } 1813 }; 1814 IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomFCC"); 1815 const Normalizer2 *customNorm2= 1816 Normalizer2::getInstance(loadTestData(errorCode), "testnorm", 1817 UNORM2_COMPOSE_CONTIGUOUS, errorCode); 1818 if(errorCode.logIfFailureAndReset("unable to load testdata/testnorm.nrm")) { 1819 return; 1820 } 1821 for(int32_t i=0; i<LENGTHOF(pairs); ++i) { 1822 const StringPair &pair=pairs[i]; 1823 UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); 1824 UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape(); 1825 UnicodeString result=customNorm2->normalize(input, errorCode); 1826 if(result!=expected) { 1827 errln("custom FCC Normalizer2 did not normalize input %d as expected", i); 1828 } 1829 } 1830} 1831 1832#endif /* #if !UCONFIG_NO_NORMALIZATION */ 1833