1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * Copyright (c) 1997-2011, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 1250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "unicode/errorcode.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/usetiter.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/schriter.h" 1783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 1927f654740f2a26ad62a5c155af9199af9e69b889claireho#include "normalizer2impl.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tstnorm.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) ((int32_t)(sizeof(array)/sizeof((array)[0]))) 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ARRAY_LENGTH(array) LENGTHOF(array) 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define CASE(id,test) case id: \ 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = #test; \ 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) { \ 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(#test "---"); \ 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)""); \ 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test(); \ 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UErrorCode status = U_ZERO_ERROR; 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::runIndexedTest(int32_t index, UBool exec, 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* &name, char* /*par*/) { 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(0,TestDecomp); 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(1,TestCompatDecomp); 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(2,TestCanonCompose); 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(3,TestCompatCompose); 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(4,TestPrevious); 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(5,TestHangulDecomp); 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(6,TestHangulCompose); 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(7,TestTibetan); 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(8,TestCompositionExclusion); 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(9,TestZeroIndex); 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(10,TestVerisign); 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(11,TestPreviousNext); 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(12,TestNormalizerAPI); 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(13,TestConcatenate); 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(14,FindFoldFCDExceptions); 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(15,TestCompare); 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CASE(16,TestSkippable); 5627f654740f2a26ad62a5c155af9199af9e69b889claireho#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 5750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CASE(17,TestCustomComp); 5850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CASE(18,TestCustomFCC); 5927f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 6027f654740f2a26ad62a5c155af9199af9e69b889claireho CASE(19,TestFilteredNormalizer2Coverage); 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; break; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert Java-style strings with \u Unicode escapes into UnicodeString objects 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UnicodeString str(const char *input) 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(input, ""); // Invariant conversion 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return str.unescape(); 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::BasicNormalizerTest() 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // canonTest 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Input Decomposed Composed 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[0][0] = str("cat"); canonTests[0][1] = str("cat"); canonTests[0][2] = str("cat"); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[1][0] = str("\\u00e0ardvark"); canonTests[1][1] = str("a\\u0300ardvark"); canonTests[1][2] = str("\\u00e0ardvark"); 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[2][0] = str("\\u1e0a"); canonTests[2][1] = str("D\\u0307"); canonTests[2][2] = str("\\u1e0a"); // D-dot_above 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[3][0] = str("D\\u0307"); canonTests[3][1] = str("D\\u0307"); canonTests[3][2] = str("\\u1e0a"); // D dot_above 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[4][0] = str("\\u1e0c\\u0307"); canonTests[4][1] = str("D\\u0323\\u0307"); canonTests[4][2] = str("\\u1e0c\\u0307"); // D-dot_below dot_above 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[5][0] = str("\\u1e0a\\u0323"); canonTests[5][1] = str("D\\u0323\\u0307"); canonTests[5][2] = str("\\u1e0c\\u0307"); // D-dot_above dot_below 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[6][0] = str("D\\u0307\\u0323"); canonTests[6][1] = str("D\\u0323\\u0307"); canonTests[6][2] = str("\\u1e0c\\u0307"); // D dot_below dot_above 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[7][0] = str("\\u1e10\\u0307\\u0323"); canonTests[7][1] = str("D\\u0327\\u0323\\u0307"); canonTests[7][2] = str("\\u1e10\\u0323\\u0307"); // D dot_below cedilla dot_above 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[8][0] = str("D\\u0307\\u0328\\u0323"); canonTests[8][1] = str("D\\u0328\\u0323\\u0307"); canonTests[8][2] = str("\\u1e0c\\u0328\\u0307"); // D dot_above ogonek dot_below 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[9][0] = str("\\u1E14"); canonTests[9][1] = str("E\\u0304\\u0300"); canonTests[9][2] = str("\\u1E14"); // E-macron-grave 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[10][0] = str("\\u0112\\u0300"); canonTests[10][1] = str("E\\u0304\\u0300"); canonTests[10][2] = str("\\u1E14"); // E-macron + grave 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[11][0] = str("\\u00c8\\u0304"); canonTests[11][1] = str("E\\u0300\\u0304"); canonTests[11][2] = str("\\u00c8\\u0304"); // E-grave + macron 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[12][0] = str("\\u212b"); canonTests[12][1] = str("A\\u030a"); canonTests[12][2] = str("\\u00c5"); // angstrom_sign 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[13][0] = str("\\u00c5"); canonTests[13][1] = str("A\\u030a"); canonTests[13][2] = str("\\u00c5"); // A-ring 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[14][0] = str("\\u00C4ffin"); canonTests[14][1] = str("A\\u0308ffin"); canonTests[14][2] = str("\\u00C4ffin"); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[15][0] = str("\\u00C4\\uFB03n"); canonTests[15][1] = str("A\\u0308\\uFB03n"); canonTests[15][2] = str("\\u00C4\\uFB03n"); 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[16][0] = str("Henry IV"); canonTests[16][1] = str("Henry IV"); canonTests[16][2] = str("Henry IV"); 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[17][0] = str("Henry \\u2163"); canonTests[17][1] = str("Henry \\u2163"); canonTests[17][2] = str("Henry \\u2163"); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[18][0] = str("\\u30AC"); canonTests[18][1] = str("\\u30AB\\u3099"); canonTests[18][2] = str("\\u30AC"); // ga (Katakana) 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[19][0] = str("\\u30AB\\u3099"); canonTests[19][1] = str("\\u30AB\\u3099"); canonTests[19][2] = str("\\u30AC"); // ka + ten 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[20][0] = str("\\uFF76\\uFF9E"); canonTests[20][1] = str("\\uFF76\\uFF9E"); canonTests[20][2] = str("\\uFF76\\uFF9E"); // hw_ka + hw_ten 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[21][0] = str("\\u30AB\\uFF9E"); canonTests[21][1] = str("\\u30AB\\uFF9E"); canonTests[21][2] = str("\\u30AB\\uFF9E"); // ka + hw_ten 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[22][0] = str("\\uFF76\\u3099"); canonTests[22][1] = str("\\uFF76\\u3099"); canonTests[22][2] = str("\\uFF76\\u3099"); // hw_ka + ten 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonTests[23][0] = str("A\\u0300\\u0316"); canonTests[23][1] = str("A\\u0316\\u0300"); canonTests[23][2] = str("\\u00C0\\u0316"); 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* compatTest */ 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Input Decomposed Composed 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[0][0] = str("cat"); compatTests[0][1] = str("cat"); compatTests[0][2] = str("cat") ; 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[1][0] = str("\\uFB4f"); compatTests[1][1] = str("\\u05D0\\u05DC"); compatTests[1][2] = str("\\u05D0\\u05DC"); // Alef-Lamed vs. Alef, Lamed 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[2][0] = str("\\u00C4ffin"); compatTests[2][1] = str("A\\u0308ffin"); compatTests[2][2] = str("\\u00C4ffin") ; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[3][0] = str("\\u00C4\\uFB03n"); compatTests[3][1] = str("A\\u0308ffin"); compatTests[3][2] = str("\\u00C4ffin") ; // ffi ligature -> f + f + i 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[4][0] = str("Henry IV"); compatTests[4][1] = str("Henry IV"); compatTests[4][2] = str("Henry IV") ; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[5][0] = str("Henry \\u2163"); compatTests[5][1] = str("Henry IV"); compatTests[5][2] = str("Henry IV") ; 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[6][0] = str("\\u30AC"); compatTests[6][1] = str("\\u30AB\\u3099"); compatTests[6][2] = str("\\u30AC") ; // ga (Katakana) 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[7][0] = str("\\u30AB\\u3099"); compatTests[7][1] = str("\\u30AB\\u3099"); compatTests[7][2] = str("\\u30AC") ; // ka + ten 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[8][0] = str("\\uFF76\\u3099"); compatTests[8][1] = str("\\u30AB\\u3099"); compatTests[8][2] = str("\\u30AC") ; // hw_ka + ten 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later */ 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[9][0] = str("\\uFF76\\uFF9E"); compatTests[9][1] = str("\\u30AB\\u3099"); compatTests[9][2] = str("\\u30AC") ; // hw_ka + hw_ten 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests[10][0] = str("\\u30AB\\uFF9E"); compatTests[10][1] = str("\\u30AB\\u3099"); compatTests[10][2] = str("\\u30AC") ; // ka + hw_ten 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Hangul Canonical */ 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Input Decomposed Composed 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hangulCanon[0][0] = str("\\ud4db"); hangulCanon[0][1] = str("\\u1111\\u1171\\u11b6"); hangulCanon[0][2] = str("\\ud4db") ; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hangulCanon[1][0] = str("\\u1111\\u1171\\u11b6"), hangulCanon[1][1] = str("\\u1111\\u1171\\u11b6"), hangulCanon[1][2] = str("\\ud4db"); 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::~BasicNormalizerTest() 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestPrevious() 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFD); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("testing decomp..."); 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < ARRAY_LENGTH(canonTests); i++) { 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backAndForth(norm, canonTests[i][0]); 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("testing compose..."); 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm->setMode(UNORM_NFC); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < ARRAY_LENGTH(canonTests); i++) { 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backAndForth(norm, canonTests[i][0]); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestDecomp() 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFD); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, canonTests, ARRAY_LENGTH(canonTests), 1); 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFD, 0, canonTests, ARRAY_LENGTH(canonTests), 1); 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestCompatDecomp() 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFKD); 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, compatTests, ARRAY_LENGTH(compatTests), 1); 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFKD, 0, 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests, ARRAY_LENGTH(compatTests), 1); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestCanonCompose() 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFC); 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, canonTests, ARRAY_LENGTH(canonTests), 2); 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFC, 0, canonTests, 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ARRAY_LENGTH(canonTests), 2); 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestCompatCompose() 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFKC); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, compatTests, ARRAY_LENGTH(compatTests), 2); 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFKC, 0, 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compatTests, ARRAY_LENGTH(compatTests), 2); 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------------- 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestHangulCompose() 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure that the static composition methods work 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Canonical composition..."); 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFC, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 2); 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Compatibility composition..."); 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now try iterative composition.... 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Static composition..."); 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFC); 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 2); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm->setMode(UNORM_NFKC); 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // And finally, make sure you can do it in reverse too 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Reverse iteration..."); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm->setMode(UNORM_NFC); 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (uint32_t i = 0; i < ARRAY_LENGTH(hangulCanon); i++) { 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backAndForth(norm, hangulCanon[i][0]); 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestHangulDecomp() 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make sure that the static decomposition methods work 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Canonical decomposition..."); 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFD, 0, hangulCanon, ARRAY_LENGTH(hangulCanon), 1); 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Compatibility decomposition..."); 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now the iterative decomposition methods... 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Iterative decomposition..."); 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* norm = new Normalizer("", UNORM_NFD); 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iterateTest(norm, hangulCanon, ARRAY_LENGTH(hangulCanon), 1); 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm->setMode(UNORM_NFKD); 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // And finally, make sure you can do it in reverse too 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Reverse iteration..."); 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru norm->setMode(UNORM_NFD); 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (uint32_t i = 0; i < ARRAY_LENGTH(hangulCanon); i++) { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru backAndForth(norm, hangulCanon[i][0]); 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete norm; 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The Tibetan vowel sign AA, 0f71, was messed up prior to Unicode version 2.1.9. 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestTibetan(void) { 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString decomp[1][3]; 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[0][0] = str("\\u0f77"); 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[0][1] = str("\\u0f77"); 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru decomp[0][2] = str("\\u0fb2\\u0f71\\u0f80"); 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString compose[1][3]; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compose[0][0] = str("\\u0fb2\\u0f71\\u0f80"); 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compose[0][1] = str("\\u0fb2\\u0f71\\u0f80"); 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compose[0][2] = str("\\u0fb2\\u0f71\\u0f80"); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFD, 0, decomp, ARRAY_LENGTH(decomp), 1); 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFKD, 0, decomp, ARRAY_LENGTH(decomp), 2); 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFC, 0, compose, ARRAY_LENGTH(compose), 1); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFKC, 0, compose, ARRAY_LENGTH(compose), 2); 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Make sure characters in the CompositionExclusion.txt list do not get 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * composed to. 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestCompositionExclusion(void) { 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This list is generated from CompositionExclusion.txt. 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Update whenever the normalizer tables are updated. Note 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that we test all characters listed, even those that can be 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // derived from the Unicode DB and are therefore commented 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // out. 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ### TODO read composition exclusion from source/data/unidata file 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and test against that 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString EXCLUDED = str( 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0340\\u0341\\u0343\\u0344\\u0374\\u037E\\u0387\\u0958" 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0959\\u095A\\u095B\\u095C\\u095D\\u095E\\u095F\\u09DC" 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u09DD\\u09DF\\u0A33\\u0A36\\u0A59\\u0A5A\\u0A5B\\u0A5E" 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0B5C\\u0B5D\\u0F43\\u0F4D\\u0F52\\u0F57\\u0F5C\\u0F69" 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0F73\\u0F75\\u0F76\\u0F78\\u0F81\\u0F93\\u0F9D\\u0FA2" 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0FA7\\u0FAC\\u0FB9\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79" 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1F7B\\u1F7D\\u1FBB\\u1FBE\\u1FC9\\u1FCB\\u1FD3\\u1FDB" 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1FE3\\u1FEB\\u1FEE\\u1FEF\\u1FF9\\u1FFB\\u1FFD\\u2000" 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u2001\\u2126\\u212A\\u212B\\u2329\\u232A\\uF900\\uFA10" 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uFA12\\uFA15\\uFA20\\uFA22\\uFA25\\uFA26\\uFA2A\\uFB1F" 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uFB2A\\uFB2B\\uFB2C\\uFB2D\\uFB2E\\uFB2F\\uFB30\\uFB31" 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uFB32\\uFB33\\uFB34\\uFB35\\uFB36\\uFB38\\uFB39\\uFB3A" 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uFB3B\\uFB3C\\uFB3E\\uFB40\\uFB41\\uFB43\\uFB44\\uFB46" 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uFB47\\uFB48\\uFB49\\uFB4A\\uFB4B\\uFB4C\\uFB4D\\uFB4E" 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ); 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<EXCLUDED.length(); ++i) { 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString a(EXCLUDED.charAt(i)); 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString b; 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString c; 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(a, UNORM_NFKD, 0, b, status); 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(b, UNORM_NFC, 0, c, status); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == a) { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("FAIL: " + hex(a) + " x DECOMP_COMPAT => " + 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hex(b) + " x COMPOSE => " + 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hex(c)); 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (verbose) { 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Ok: " + hex(a) + " x DECOMP_COMPAT => " + 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hex(b) + " x COMPOSE => " + 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hex(c)); 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test for a problem that showed up just before ICU 1.6 release 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * having to do with combining characters with an index of zero. 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Such characters do not participate in any canonical 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * decompositions. However, having an index of zero means that 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * they all share one typeMask[] entry, that is, they all have to 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * map to the same canonical class, which is not the case, in 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * reality. 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestZeroIndex(void) { 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* DATA[] = { 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Expect col1 x COMPOSE_COMPAT => col2 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Expect col2 x DECOMP => col3 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u0316\\u0300", "\\u00C0\\u0316", "A\\u0316\\u0300", 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u0300\\u0316", "\\u00C0\\u0316", "A\\u0316\\u0300", 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u0327\\u0300", "\\u00C0\\u0327", "A\\u0327\\u0300", 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "c\\u0321\\u0327", "c\\u0321\\u0327", "c\\u0321\\u0327", 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "c\\u0327\\u0321", "\\u00E7\\u0321", "c\\u0327\\u0321", 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<DATA_length; i+=3) { 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString a(DATA[i], ""); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru a = a.unescape(); 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString b; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(a, UNORM_NFKC, 0, b, status); 36150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 36250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error calling normalize UNORM_NFKC: %s", u_errorName(status)); 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 36450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString exp(DATA[i+1], ""); 36550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho exp = exp.unescape(); 36650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (b == exp) { 36750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln((UnicodeString)"Ok: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b)); 36850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 36950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln((UnicodeString)"FAIL: " + hex(a) + " x COMPOSE_COMPAT => " + hex(b) + 37050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ", expect " + hex(exp)); 37150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(b, UNORM_NFD, 0, a, status); 37450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 37550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error calling normalize UNORM_NFD: %s", u_errorName(status)); 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 37750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString exp = UnicodeString(DATA[i+2], "").unescape(); 37850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (a == exp) { 37950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho logln((UnicodeString)"Ok: " + hex(b) + " x DECOMP => " + hex(a)); 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 38150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln((UnicodeString)"FAIL: " + hex(b) + " x DECOMP => " + hex(a) + 38250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho ", expect " + hex(exp)); 38350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Run a few specific cases that are failing for Verisign. 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestVerisign(void) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > Their input: 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > Their output (supposedly from ICU): 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > 05B8 05B1 05B9 0591 05C3 05B0 05AC 059F 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > My output from charlint: 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > 05B1 05B8 05B9 0591 05C3 05B0 05AC 059F 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 05B8 05B9 05B1 0591 05C3 05B0 05AC 059F => 05B1 05B8 05B9 0591 05C3 05B0 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 05AC 059F 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B8 18 E HEBREW POINT QAMATS 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B9 19 F HEBREW POINT HOLAM 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B1 11 HEBREW POINT HATAF SEGOL 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0591 220 HEBREW ACCENT ETNAHTA 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AC 230 HEBREW ACCENT ILUY 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+059F 230 HEBREW ACCENT QARNEY PARA 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B1 11 HEBREW POINT HATAF SEGOL 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B8 18 HEBREW POINT QAMATS 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B9 19 HEBREW POINT HOLAM 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0591 220 HEBREW ACCENT ETNAHTA 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AC 230 HEBREW ACCENT ILUY 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+059F 230 HEBREW ACCENT QARNEY PARA 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Wrong result: 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B8 18 HEBREW POINT QAMATS 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B1 11 HEBREW POINT HATAF SEGOL 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B9 19 HEBREW POINT HOLAM 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0591 220 HEBREW ACCENT ETNAHTA 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C3 0 HEBREW PUNCTUATION SOF PASUQ 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AC 230 HEBREW ACCENT ILUY 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+059F 230 HEBREW ACCENT QARNEY PARA 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru > Their input: 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru >0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru >Their output (supposedly from ICU): 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru >0592 05B0 05B7 05BC 05A5 05C0 05AD 05C4 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru >My output from charlint: 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru >05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD => 05B0 05B7 05BC 05A5 0592 05C0 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 05AD 05C4 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0592 230 HEBREW ACCENT SEGOL 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B7 17 HEBREW POINT PATAH 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05A5 220 HEBREW ACCENT MERKHA 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C0 0 HEBREW PUNCTUATION PASEQ 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C4 230 HEBREW MARK UPPER DOT 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AD 222 HEBREW ACCENT DEHI 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B7 17 HEBREW POINT PATAH 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05A5 220 HEBREW ACCENT MERKHA 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0592 230 HEBREW ACCENT SEGOL 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C0 0 HEBREW PUNCTUATION PASEQ 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AD 222 HEBREW ACCENT DEHI 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C4 230 HEBREW MARK UPPER DOT 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Wrong result: 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+0592 230 HEBREW ACCENT SEGOL 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B0 10 HEBREW POINT SHEVA 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05B7 17 HEBREW POINT PATAH 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05BC 21 HEBREW POINT DAGESH OR MAPIQ 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05A5 220 HEBREW ACCENT MERKHA 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C0 0 HEBREW PUNCTUATION PASEQ 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05AD 222 HEBREW ACCENT DEHI 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U+05C4 230 HEBREW MARK UPPER DOT 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString data[2][3]; 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[0][0] = str("\\u05B8\\u05B9\\u05B1\\u0591\\u05C3\\u05B0\\u05AC\\u059F"); 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[0][1] = str("\\u05B1\\u05B8\\u05B9\\u0591\\u05C3\\u05B0\\u05AC\\u059F"); 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[0][2] = str(""); 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[1][0] = str("\\u0592\\u05B7\\u05BC\\u05A5\\u05B0\\u05C0\\u05C4\\u05AD"); 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[1][1] = str("\\u05B0\\u05B7\\u05BC\\u05A5\\u0592\\u05C0\\u05AD\\u05C4"); 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data[1][2] = str(""); 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFD, 0, data, ARRAY_LENGTH(data), 1); 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru staticTest(UNORM_NFC, 0, data, ARRAY_LENGTH(data), 1); 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//------------------------------------------------------------------------ 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Internal utilities 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString BasicNormalizerTest::hex(UChar ch) { 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return appendHex(ch, 4, result); 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString BasicNormalizerTest::hex(const UnicodeString& s) { 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i = 0; i < s.length(); ++i) { 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != 0) result += (UChar)0x2c/*,*/; 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru appendHex(s[i], 4, result); 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruinline static void insert(UnicodeString& dest, int pos, UChar32 ch) 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru dest.replace(pos, 0, ch); 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::backAndForth(Normalizer* iter, const UnicodeString& input) 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter->setText(input, status); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Run through the iterator forwards and stick it into a StringBuffer 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString forward; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ch = iter->first(); ch != iter->DONE; ch = iter->next()) { 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru forward += ch; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now do it backwards 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString reverse; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ch = iter->last(); ch != iter->DONE; ch = iter->previous()) { 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru insert(reverse, 0, ch); 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (forward != reverse) { 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Forward/reverse mismatch for input " + hex(input) 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + ", forward: " + hex(forward) + ", backward: " + hex(reverse)); 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::staticTest(UNormalizationMode mode, int options, 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString tests[][3], int length, 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int outCol) 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i = 0; i < length; i++) 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& input = tests[i][0]; 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& expect = tests[i][outCol]; 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Normalizing '" + input + "' (" + hex(input) + ")" ); 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString output; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(input, mode, options, output, status); 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (output != expect) { 54450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln(UnicodeString("ERROR: case ") + i + " normalized " + hex(input) + "\n" 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + " expected " + hex(expect) + "\n" 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + " static got " + hex(output) ); 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::iterateTest(Normalizer* iter, 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString tests[][3], int length, 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int outCol) 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int i = 0; i < length; i++) 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& input = tests[i][0]; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& expect = tests[i][outCol]; 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Normalizing '" + input + "' (" + hex(input) + ")" ); 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter->setText(input, status); 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru assertEqual(input, expect, iter, UnicodeString("ERROR: case ") + i + " "); 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::assertEqual(const UnicodeString& input, 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& expected, 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer* iter, 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& errPrefix) 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString result; 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (UChar32 ch = iter->first(); ch != iter->DONE; ch = iter->next()) { 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result += ch; 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result != expected) { 57850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln(errPrefix + "normalized " + hex(input) + "\n" 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + " expected " + hex(expected) + "\n" 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + " iterate got " + hex(result) ); 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// helper class for TestPreviousNext() 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// simple UTF-32 character iterator 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass UChar32Iterator { 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32Iterator(const UChar32 *text, int32_t len, int32_t index) : 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s(text), length(len), i(index) {} 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 current() { 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<length) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s[i]; 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xffff; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 next() { 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<length) { 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s[i++]; 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xffff; 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 previous() { 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i>0) { 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return s[--i]; 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0xffff; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t getIndex() { 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *s; 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length, i; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestPreviousNext(const UChar *src, int32_t srcLength, 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar32 *expect, int32_t expectLength, 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const int32_t *expectIndex, // its length=expectLength+1 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t srcMiddle, int32_t expectMiddle, 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *moves, 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name) { 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // iterators 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer iter(src, srcLength, mode); 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test getStaticClassID and getDynamicClassID 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iter.getDynamicClassID() != Normalizer::getStaticClassID()) { 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("getStaticClassID != getDynamicClassID for Normalizer."); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32Iterator iter32(expect, expectLength, expectMiddle); 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c1, c2; 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char m; 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initially set the indexes into the middle of the strings 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter.setIndexOnly(srcMiddle); 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // move around and compare the iteration code points with 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the expected ones 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *move=moves; 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while((m=*move++)!=0) { 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(m=='-') { 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=iter.previous(); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=iter32.previous(); 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(m=='0') { 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=iter.current(); 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=iter32.current(); 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* m=='+' */ { 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c1=iter.next(); 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c2=iter32.next(); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compare results 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c1!=c2) { 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // copy the moves until the current (m) move, and terminate 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char history[64]; 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(history, moves); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru history[move-moves]=0; 66850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error: mismatch in Normalizer iteration (%s) at %s: " 66950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "got c1=U+%04lx != expected c2=U+%04lx", 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name, history, c1, c2); 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // compare indexes 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(iter.getIndex()!=expectIndex[iter32.getIndex()]) { 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // copy the moves until the current (m) move, and terminate 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char history[64]; 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(history, moves); 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru history[move-moves]=0; 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error: index mismatch in Normalizer iteration (%s) at %s: " 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Normalizer index %ld expected %ld\n", 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name, history, iter.getIndex(), expectIndex[iter32.getIndex()]); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestPreviousNext() { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // src and expect strings 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar src[]={ 69283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_LEAD(0x2f999), U16_TRAIL(0x2f999), 69383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_LEAD(0x1d15f), U16_TRAIL(0x1d15f), 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xc4, 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x1ed0 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar32 expect[]={ 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x831d, 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x1d158, 0x1d165, 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x41, 0x308, 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x4f, 0x302, 0x301 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // expected src indexes corresponding to expect indexes 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t expectIndex[]={ 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2, 2, 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4, 4, 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5, 5, 5, 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 6 // behind last character 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // src and expect strings for regression test for j2911 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar src_j2911[]={ 71583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius U16_LEAD(0x2f999), U16_TRAIL(0x2f999), 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xdd00, 0xd900, // unpaired surrogates - regression test for j2911 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xc4, 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x4f, 0x302, 0x301 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar32 expect_j2911[]={ 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x831d, 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xdd00, 0xd900, // unpaired surrogates - regression test for j2911 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xc4, 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x1ed0 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // expected src indexes corresponding to expect indexes 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const int32_t expectIndex_j2911[]={ 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0, 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2, 3, 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 4, 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 5, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8 // behind last character 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // initial indexes into the src and expect strings 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for both sets of test data 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum { 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE=4, 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru EXPECT_MIDDLE=3, 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE_2=2, 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru EXPECT_MIDDLE_2=1 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // movement vector 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - for previous(), 0 for current(), + for next() 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for both sets of test data 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const moves="0+0+0--0-0-+++0--+++++++0--------"; 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestPreviousNext(src, LENGTHOF(src), 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect, LENGTHOF(expect), 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectIndex, 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE, EXPECT_MIDDLE, 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru moves, UNORM_NFD, "basic"); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestPreviousNext(src_j2911, LENGTHOF(src_j2911), 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect_j2911, LENGTHOF(expect_j2911), 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectIndex_j2911, 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE, EXPECT_MIDDLE, 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru moves, UNORM_NFKC, "j2911"); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // try again from different "middle" indexes 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestPreviousNext(src, LENGTHOF(src), 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect, LENGTHOF(expect), 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectIndex, 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE_2, EXPECT_MIDDLE_2, 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru moves, UNORM_NFD, "basic_2"); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestPreviousNext(src_j2911, LENGTHOF(src_j2911), 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect_j2911, LENGTHOF(expect_j2911), 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expectIndex_j2911, 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SRC_MIDDLE_2, EXPECT_MIDDLE_2, 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru moves, UNORM_NFKC, "j2911_2"); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid BasicNormalizerTest::TestConcatenate() { 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cases[][4]={ 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* mode, left, right, result */ 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "C", 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "re", 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0301sum\\u00e9", 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "r\\u00e9sum\\u00e9" 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }, 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "C", 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u1100", 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1161bcdefghijk", 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\uac00bcdefghijk" 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }, 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* ### TODO: add more interesting cases */ 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 79450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "D", 79550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\\u03B1\\u0345", 79650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\\u0C4D\\U000110BA\\U0001D169", 79750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "\\u03B1\\U0001D169\\U000110BA\\u0C4D\\u0345" 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString left, right, expect, result, r; 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode; 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test concatenation */ 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<(int32_t)(sizeof(cases)/sizeof(cases[0])); ++i) { 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch(*cases[i][0]) { 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'C': mode=UNORM_NFC; break; 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'D': mode=UNORM_NFD; break; 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'c': mode=UNORM_NFKC; break; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 'd': mode=UNORM_NFKD; break; 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: mode=UNORM_NONE; break; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru left=UnicodeString(cases[i][1], "").unescape(); 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru right=UnicodeString(cases[i][2], "").unescape(); 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expect=UnicodeString(cases[i][3], "").unescape(); 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //result=r=UnicodeString(); 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode) || /*result!=r ||*/ result!=expect) { 82550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error in Normalizer::concatenate(), cases[] fails with "+ 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString(u_errorName(errorCode))+", result==expect: expected: "+ 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru hex(expect)+" =========> got: " + hex(result)); 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test error cases */ 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* left.getBuffer()==result.getBuffer() */ 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=r=expect=UnicodeString("zz", ""); 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_UNEXPECTED_TOKEN; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(errorCode!=U_UNEXPECTED_TOKEN || result!=r || !result.isBogus()) { 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::concatenate(), violates UErrorCode protocol"); 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru left.setToBogus(); 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r=Normalizer::concatenate(left, right, result, mode, 0, errorCode); 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || result!=r || !result.isBogus()) { 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error in Normalizer::concatenate(), does not detect left.isBogus()"); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// reference implementation of Normalizer::compare 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruref_norm_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString r1, r2, t1, t2; 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(options&U_COMPARE_IGNORE_CASE) { 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(s1, FALSE, normOptions, r1, errorCode); 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(s2, FALSE, normOptions, r2, errorCode); 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r1.foldCase(options); 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r2.foldCase(options); 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r1=s1; 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru r2=s2; 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(r1, FALSE, normOptions, t1, errorCode); 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(r2, FALSE, normOptions, t2, errorCode); 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(options&U_COMPARE_CODE_POINT_ORDER) { 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t1.compareCodePointOrder(t2); 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t1.compare(t2); 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// test wrapper for Normalizer::compare, sets UNORM_INPUT_IS_FCD appropriately 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_norm_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options, UErrorCode &errorCode) { 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( UNORM_YES==Normalizer::quickCheck(s1, UNORM_FCD, normOptions, errorCode) && 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNORM_YES==Normalizer::quickCheck(s2, UNORM_FCD, normOptions, errorCode)) { 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru options|=UNORM_INPUT_IS_FCD; 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return Normalizer::compare(s1, s2, options, errorCode); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// reference implementation of UnicodeString::caseCompare 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruref_case_compare(const UnicodeString &s1, const UnicodeString &s2, uint32_t options) { 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString t1, t2; 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t1=s1; 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t2=s2; 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t1.foldCase(options); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t2.foldCase(options); 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(options&U_COMPARE_CODE_POINT_ORDER) { 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t1.compareCodePointOrder(t2); 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t1.compare(t2); 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// reduce an integer to -1/0/1 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_sign(int32_t value) { 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value==0) { 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (value>>31)|1; 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char * 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru_signString(int32_t value) { 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(value<0) { 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return "<0"; 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(value==0) { 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return "=0"; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else /* value>0 */ { 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ">0"; 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestCompare() { 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test Normalizer::compare and unorm_compare (thinly wrapped by the former) 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // by comparing it with its semantic equivalent 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // since we trust the pieces, this is sufficient 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test each string with itself and each other 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // each time with all options 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const char *const 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strings[]={ 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // some cases from NormalizationTest.txt 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 0..3 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "D\\u031B\\u0307\\u0323", 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1E0C\\u031B\\u0307", 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "D\\u031B\\u0323\\u0307", 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "d\\u031B\\u0323\\u0307", 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 4..6 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00E4", 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u0308", 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u0308", 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Angstrom sign = A ring 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 7..10 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u212B", 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00C5", 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u030A", 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u030A", 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 11.14 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u059A\\u0316\\u302A\\u032Fb", 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u302A\\u0316\\u032F\\u059Ab", 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u302A\\u0316\\u032F\\u059Ab", 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u059A\\u0316\\u302A\\u032Fb", 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // from ICU case folding tests 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 15..20 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "A\\u00df\\u00b5\\ufb03\\U0001040c\\u0131", 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ass\\u03bcffi\\U00010434i", 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0061\\u0042\\u0131\\u03a3\\u00df\\ufb03\\ud93f\\udfff", 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0041\\u0062\\u0069\\u03c3\\u0073\\u0053\\u0046\\u0066\\u0049\\ud93f\\udfff", 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0041\\u0062\\u0131\\u03c3\\u0053\\u0073\\u0066\\u0046\\u0069\\ud93f\\udfff", 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0041\\u0062\\u0069\\u03c3\\u0073\\u0053\\u0046\\u0066\\u0049\\ud93f\\udffd", 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // U+d800 U+10001 see implementation comment in unorm_cmpEquivFold 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // vs. U+10000 at bottom - code point order 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 21..22 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud800\\ud800\\udc01", 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud800\\udc00", 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // other code point order tests from ustrtest.cpp 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 23..31 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u20ac\\ud801", 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u20ac\\ud800\\udc00", 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud800", 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud800\\uff61", 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\udfff", 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uff61\\udfff", 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uff61\\ud800\\udc02", 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud800\\udc02", 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\ud84d\\udc56", 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // long strings, see cnormtst.c/TestNormCoverage() 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // equivalent if case-insensitive 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 32..33 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uAD8B\\uAD8B\\uAD8B\\uAD8B" 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaaaaaaaaaaaaaaaazzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\uAD8B\\uAD8B\\uAD8B\\uAD8B" 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "d\\u031B\\u0307\\u0323", 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1100\\u116f\\u11aa\\uAD8B\\uAD8B\\u1100\\u116f\\u11aa" 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\U0001d15e\\U0001d157\\U0001d165\\U0001d15e\\U0001d15e\\U0001d15e\\U0001d15e" 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "aaaaaaaaaaAAAAAAAAZZZZZZZZZZZZZZZZzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc" 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1100\\u116f\\u11aa\\uAD8B\\uAD8B\\u1100\\u116f\\u11aa" 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u1E0C\\u031B\\u0307", 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // some strings that may make a difference whether the compare function 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // case-folds or decomposes first 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 34..41 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u0345\\u0334", 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u03b9\\u0334", 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u1f80\\u0334", 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u03b1\\u0313\\u03b9\\u0334", 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u1ffc\\u0334", 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0360\\u03c9\\u03b9\\u0334", 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u0360\\u0345\\u0360\\u0345b", 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "a\\u0345\\u0360\\u0345\\u0360b", 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // interesting cases for canonical caseless match with turkic i handling 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 42..43 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00cc", 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0069\\u0300", 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // strings with post-Unicode 3.2 normalization or normalization corrections 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 44..45 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u00e4\\u193b\\U0002f868", 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "\\u0061\\u193b\\u0308\\u36fc", 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // empty string 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 46 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "" 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s[100]; // at least as many items as in strings[] ! 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all combinations of options 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UNORM_INPUT_IS_FCD is set automatically if both input strings fulfill FCD conditions 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // set UNORM_UNICODE_3_2 in one additional combination 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const struct { 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t options; 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } opt[]={ 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 0, "default" }, 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { U_COMPARE_CODE_POINT_ORDER, "c.p. order" }, 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { U_COMPARE_IGNORE_CASE, "ignore case" }, 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE, "c.p. order & ignore case" }, 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "ignore case & special i" }, 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { U_COMPARE_CODE_POINT_ORDER|U_COMPARE_IGNORE_CASE|U_FOLD_CASE_EXCLUDE_SPECIAL_I, "c.p. order & ignore case & special i" }, 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { UNORM_UNICODE_3_2<<UNORM_COMPARE_NORM_OPTIONS_SHIFT, "Unicode 3.2" } 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j, k, count=LENGTHOF(strings); 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result, refResult; 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // create the UnicodeStrings 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<count; ++i) { 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s[i]=UnicodeString(strings[i], "").unescape(); 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test them each with each other 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<count; ++i) { 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=i; j<count; ++j) { 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(k=0; k<LENGTHOF(opt); ++k) { 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test Normalizer::compare 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=_norm_compare(s[i], s[j], opt[k].options, errorCode); 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru refResult=ref_norm_compare(s[i], s[j], opt[k].options, errorCode); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(_sign(result)!=_sign(refResult)) { 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer::compare(%d, %d, %s)%s should be %s %s", 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, j, opt[k].name, _signString(result), _signString(refResult), 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test UnicodeString::caseCompare - same internal implementation function 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(opt[k].options&U_COMPARE_IGNORE_CASE) { 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result=s[i].caseCompare(s[j], opt[k].options); 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru refResult=ref_case_compare(s[i], s[j], opt[k].options); 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(_sign(result)!=_sign(refResult)) { 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("UniStr::caseCompare(%d, %d, %s)%s should be %s %s", 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i, j, opt[k].name, _signString(result), _signString(refResult), 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test cases with i and I to make sure Turkic works 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar iI[]={ 0x49, 0x69, 0x130, 0x131 }; 110827f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet iSet, set; 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s1, s2; 111127f654740f2a26ad62a5c155af9199af9e69b889claireho 111227f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); 111327f654740f2a26ad62a5c155af9199af9e69b889claireho if(U_FAILURE(errorCode) || !nfcImpl->ensureCanonIterData(errorCode)) { 111427f654740f2a26ad62a5c155af9199af9e69b889claireho dataerrln("Normalizer2Factory::getNFCImpl().ensureCanonIterData() failed: %s", 111527f654740f2a26ad62a5c155af9199af9e69b889claireho u_errorName(errorCode)); 111627f654740f2a26ad62a5c155af9199af9e69b889claireho return; 111727f654740f2a26ad62a5c155af9199af9e69b889claireho } 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // collect all sets into one for contiguous output 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<LENGTHOF(iI); ++i) { 112127f654740f2a26ad62a5c155af9199af9e69b889claireho if(nfcImpl->getCanonStartSet(iI[i], iSet)) { 112227f654740f2a26ad62a5c155af9199af9e69b889claireho set.addAll(iSet); 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test all of these precomposed characters 112727f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSetIterator it(set); 112927f654740f2a26ad62a5c155af9199af9e69b889claireho while(it.next() && !it.isString()) { 113027f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c=it.getCodepoint(); 113127f654740f2a26ad62a5c155af9199af9e69b889claireho if(!nfcNorm2->getDecomposition(c, s2)) { 113227f654740f2a26ad62a5c155af9199af9e69b889claireho dataerrln("NFC.getDecomposition(i-composite U+%04lx) failed", (long)c); 113327f654740f2a26ad62a5c155af9199af9e69b889claireho return; 113427f654740f2a26ad62a5c155af9199af9e69b889claireho } 113527f654740f2a26ad62a5c155af9199af9e69b889claireho 113627f654740f2a26ad62a5c155af9199af9e69b889claireho s1.setTo(c); 113727f654740f2a26ad62a5c155af9199af9e69b889claireho for(k=0; k<LENGTHOF(opt); ++k) { 113827f654740f2a26ad62a5c155af9199af9e69b889claireho // test Normalizer::compare 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 114027f654740f2a26ad62a5c155af9199af9e69b889claireho result=_norm_compare(s1, s2, opt[k].options, errorCode); 114127f654740f2a26ad62a5c155af9199af9e69b889claireho refResult=ref_norm_compare(s1, s2, opt[k].options, errorCode); 114227f654740f2a26ad62a5c155af9199af9e69b889claireho if(_sign(result)!=_sign(refResult)) { 114327f654740f2a26ad62a5c155af9199af9e69b889claireho errln("Normalizer::compare(U+%04x with its NFD, %s)%s should be %s %s", 114427f654740f2a26ad62a5c155af9199af9e69b889claireho c, opt[k].name, _signString(result), _signString(refResult), 114527f654740f2a26ad62a5c155af9199af9e69b889claireho U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 114827f654740f2a26ad62a5c155af9199af9e69b889claireho // test UnicodeString::caseCompare - same internal implementation function 114927f654740f2a26ad62a5c155af9199af9e69b889claireho if(opt[k].options&U_COMPARE_IGNORE_CASE) { 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 115127f654740f2a26ad62a5c155af9199af9e69b889claireho result=s1.caseCompare(s2, opt[k].options); 115227f654740f2a26ad62a5c155af9199af9e69b889claireho refResult=ref_case_compare(s1, s2, opt[k].options); 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(_sign(result)!=_sign(refResult)) { 115427f654740f2a26ad62a5c155af9199af9e69b889claireho errln("UniStr::caseCompare(U+%04x with its NFD, %s)%s should be %s %s", 115527f654740f2a26ad62a5c155af9199af9e69b889claireho c, opt[k].name, _signString(result), _signString(refResult), 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SUCCESS(errorCode) ? "" : u_errorName(errorCode)); 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 116127f654740f2a26ad62a5c155af9199af9e69b889claireho 116227f654740f2a26ad62a5c155af9199af9e69b889claireho // test getDecomposition() for some characters that do not decompose 116327f654740f2a26ad62a5c155af9199af9e69b889claireho if( nfcNorm2->getDecomposition(0x20, s2) || 116427f654740f2a26ad62a5c155af9199af9e69b889claireho nfcNorm2->getDecomposition(0x4e00, s2) || 116527f654740f2a26ad62a5c155af9199af9e69b889claireho nfcNorm2->getDecomposition(0x20002, s2) 116627f654740f2a26ad62a5c155af9199af9e69b889claireho ) { 116727f654740f2a26ad62a5c155af9199af9e69b889claireho errln("NFC.getDecomposition() returns TRUE for characters which do not have decompositions"); 116827f654740f2a26ad62a5c155af9199af9e69b889claireho } 116927f654740f2a26ad62a5c155af9199af9e69b889claireho 117083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // test getRawDecomposition() for some characters that do not decompose 117183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if( nfcNorm2->getRawDecomposition(0x20, s2) || 117283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius nfcNorm2->getRawDecomposition(0x4e00, s2) || 117383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius nfcNorm2->getRawDecomposition(0x20002, s2) 117483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ) { 117583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius errln("NFC.getRawDecomposition() returns TRUE for characters which do not have decompositions"); 117683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 117783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 117883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // test composePair() for some pairs of characters that do not compose 117983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if( nfcNorm2->composePair(0x20, 0x301)>=0 || 118083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius nfcNorm2->composePair(0x61, 0x305)>=0 || 118183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius nfcNorm2->composePair(0x1100, 0x1160)>=0 || 118283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius nfcNorm2->composePair(0xac00, 0x11a7)>=0 118383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ) { 118483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius errln("NFC.composePair() incorrectly composes some pairs of characters"); 118583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 118683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 118727f654740f2a26ad62a5c155af9199af9e69b889claireho // test FilteredNormalizer2::getDecomposition() 118827f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff]"), errorCode); 118927f654740f2a26ad62a5c155af9199af9e69b889claireho FilteredNormalizer2 fn2(*nfcNorm2, filter); 119027f654740f2a26ad62a5c155af9199af9e69b889claireho if( fn2.getDecomposition(0xe4, s1) || !fn2.getDecomposition(0x100, s2) || 119127f654740f2a26ad62a5c155af9199af9e69b889claireho s2.length()!=2 || s2[0]!=0x41 || s2[1]!=0x304 119227f654740f2a26ad62a5c155af9199af9e69b889claireho ) { 119327f654740f2a26ad62a5c155af9199af9e69b889claireho errln("FilteredNormalizer2(NFC, ^A0-FF).getDecomposition() failed"); 119427f654740f2a26ad62a5c155af9199af9e69b889claireho } 119583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 119683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // test FilteredNormalizer2::getRawDecomposition() 119783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if( fn2.getRawDecomposition(0xe4, s1) || !fn2.getRawDecomposition(0x100, s2) || 119883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius s2.length()!=2 || s2[0]!=0x41 || s2[1]!=0x304 119983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ) { 120083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius errln("FilteredNormalizer2(NFC, ^A0-FF).getRawDecomposition() failed"); 120183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 120283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 120383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius // test FilteredNormalizer2::composePair() 120483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if( 0x100!=fn2.composePair(0x41, 0x304) || 120583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius fn2.composePair(0xc7, 0x301)>=0 // unfiltered result: U+1E08 120683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ) { 120783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius errln("FilteredNormalizer2(NFC, ^A0-FF).composePair() failed"); 120883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// verify that case-folding does not un-FCD strings 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::countFoldFCDExceptions(uint32_t foldingOptions) { 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s, fold, d; 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint8_t cc, trailCC, foldCC, foldTrailCC; 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationCheckResult qcResult; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t category; 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isNFD; 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Test if case folding may un-FCD a string (folding options %04lx)", foldingOptions); 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=0; 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(c=0; c<=0x10ffff; ++c) { 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode = U_ZERO_ERROR; 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru category=u_charType(c); 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(category==U_UNASSIGNED) { 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; // skip unassigned code points 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c==0xac00) { 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xd7a3; // skip Hangul - no case folding there 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip Han blocks - no case folding there either 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c==0x3400) { 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0x4db5; 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c==0x4e00) { 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0x9fa5; 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c==0x20000) { 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0x2a6d6; 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.setTo(c); 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get leading and trailing cc for c 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(s, FALSE, 0, d, errorCode); 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isNFD= s==d; 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cc=u_getCombiningClass(d.char32At(0)); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru trailCC=u_getCombiningClass(d.char32At(d.length()-1)); 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get leading and trailing cc for the case-folding of c 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.foldCase(foldingOptions); 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::decompose(s, FALSE, 0, d, errorCode); 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru foldCC=u_getCombiningClass(d.char32At(0)); 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru foldTrailCC=u_getCombiningClass(d.char32At(d.length()-1)); 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru qcResult=Normalizer::quickCheck(s, UNORM_FCD, errorCode); 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(errorCode)) { 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 126850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("U+%04lx: Failed with error %s", u_errorName(errorCode)); 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // bad: 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - character maps to empty string: adjacent characters may then need reordering 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - folding has different leading/trailing cc's, and they don't become just 0 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - folding itself is not FCD 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( qcResult!=UNORM_YES || 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.isEmpty() || 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (cc!=foldCC && foldCC!=0) || (trailCC!=foldTrailCC && foldTrailCC!=0) 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 128050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("U+%04lx: case-folding may un-FCD a string (folding options %04lx)", c, foldingOptions); 128150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln(" cc %02x trailCC %02x foldCC(U+%04lx) %02x foldTrailCC(U+%04lx) %02x quickCheck(folded)=%d", cc, trailCC, d.char32At(0), foldCC, d.char32At(d.length()-1), foldTrailCC, qcResult); 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // also bad: 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // if a code point is in NFD but its case folding is not, then 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unorm_compare will also fail 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(isNFD && UNORM_YES!=Normalizer::quickCheck(s, UNORM_NFD, errorCode)) { 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++count; 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("U+%04lx: case-folding un-NFDs this character (folding options %04lx)", c, foldingOptions); 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("There are %ld code points for which case-folding may un-FCD a string (folding options %04lx)", count, foldingOptions); 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return count; 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::FindFoldFCDExceptions() { 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count; 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count=countFoldFCDExceptions(0); 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count+=countFoldFCDExceptions(U_FOLD_CASE_EXCLUDE_SPECIAL_I); 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count>0) { 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If case-folding un-FCDs any strings, then unorm_compare() must be 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * re-implemented. 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * It currently assumes that one can check for FCD then case-fold 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and then still have FCD strings for raw decomposition without reordering. 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 131150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("error: There are %ld code points for which case-folding may un-FCD a string for all folding options.\n" 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "See comment in BasicNormalizerTest::FindFoldFCDExceptions()!", count); 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void 131727f654740f2a26ad62a5c155af9199af9e69b889clairehoinitExpectedSkippables(UnicodeSet skipSets[UNORM_MODE_COUNT], UErrorCode &errorCode) { 131827f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFD].applyPattern( 131927f654740f2a26ad62a5c155af9199af9e69b889claireho UNICODE_STRING_SIMPLE("[[:NFD_QC=Yes:]&[:ccc=0:]]"), errorCode); 132027f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFC].applyPattern( 132127f654740f2a26ad62a5c155af9199af9e69b889claireho UNICODE_STRING_SIMPLE("[[:NFC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]"), errorCode); 132227f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFKD].applyPattern( 132327f654740f2a26ad62a5c155af9199af9e69b889claireho UNICODE_STRING_SIMPLE("[[:NFKD_QC=Yes:]&[:ccc=0:]]"), errorCode); 132427f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFKC].applyPattern( 132527f654740f2a26ad62a5c155af9199af9e69b889claireho UNICODE_STRING_SIMPLE("[[:NFKC_QC=Yes:]&[:ccc=0:]-[:HST=LV:]]"), errorCode); 132627f654740f2a26ad62a5c155af9199af9e69b889claireho 132727f654740f2a26ad62a5c155af9199af9e69b889claireho // Remove from the NFC and NFKC sets all those characters that change 132827f654740f2a26ad62a5c155af9199af9e69b889claireho // when a back-combining character is added. 132927f654740f2a26ad62a5c155af9199af9e69b889claireho // First, get all of the back-combining characters and their combining classes. 133027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet combineBack("[:NFC_QC=Maybe:]", errorCode); 133127f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t numCombineBack=combineBack.size(); 133227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t *combineBackCharsAndCc=new int32_t[numCombineBack*2]; 133327f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSetIterator iter(combineBack); 133427f654740f2a26ad62a5c155af9199af9e69b889claireho for(int32_t i=0; i<numCombineBack; ++i) { 133527f654740f2a26ad62a5c155af9199af9e69b889claireho iter.next(); 133627f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c=iter.getCodepoint(); 133727f654740f2a26ad62a5c155af9199af9e69b889claireho combineBackCharsAndCc[2*i]=c; 133827f654740f2a26ad62a5c155af9199af9e69b889claireho combineBackCharsAndCc[2*i+1]=u_getCombiningClass(c); 133927f654740f2a26ad62a5c155af9199af9e69b889claireho } 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 134127f654740f2a26ad62a5c155af9199af9e69b889claireho // We need not look at control codes, Han characters nor Hangul LVT syllables because they 134227f654740f2a26ad62a5c155af9199af9e69b889claireho // do not combine forward. LV syllables are already removed. 134327f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet notInteresting("[[:C:][:Unified_Ideograph:][:HST=LVT:]]", errorCode); 134427f654740f2a26ad62a5c155af9199af9e69b889claireho LocalPointer<UnicodeSet> unsure(&((UnicodeSet *)(skipSets[UNORM_NFC].clone()))->removeAll(notInteresting)); 134527f654740f2a26ad62a5c155af9199af9e69b889claireho // System.out.format("unsure.size()=%d\n", unsure.size()); 134627f654740f2a26ad62a5c155af9199af9e69b889claireho 134727f654740f2a26ad62a5c155af9199af9e69b889claireho // For each character about which we are unsure, see if it changes when we add 134827f654740f2a26ad62a5c155af9199af9e69b889claireho // one of the back-combining characters. 134983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius const Normalizer2 *norm2=Normalizer2::getNFCInstance(errorCode); 135027f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString s; 135127f654740f2a26ad62a5c155af9199af9e69b889claireho iter.reset(*unsure); 135227f654740f2a26ad62a5c155af9199af9e69b889claireho while(iter.next()) { 135327f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c=iter.getCodepoint(); 135427f654740f2a26ad62a5c155af9199af9e69b889claireho s.setTo(c); 135527f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t cLength=s.length(); 135627f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t tccc=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS); 135727f654740f2a26ad62a5c155af9199af9e69b889claireho for(int32_t i=0; i<numCombineBack; ++i) { 135827f654740f2a26ad62a5c155af9199af9e69b889claireho // If c's decomposition ends with a character with non-zero combining class, then 135927f654740f2a26ad62a5c155af9199af9e69b889claireho // c can only change if it combines with a character with a non-zero combining class. 136027f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t cc2=combineBackCharsAndCc[2*i+1]; 136127f654740f2a26ad62a5c155af9199af9e69b889claireho if(tccc==0 || cc2!=0) { 136227f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 c2=combineBackCharsAndCc[2*i]; 136327f654740f2a26ad62a5c155af9199af9e69b889claireho s.append(c2); 136427f654740f2a26ad62a5c155af9199af9e69b889claireho if(!norm2->isNormalized(s, errorCode)) { 136527f654740f2a26ad62a5c155af9199af9e69b889claireho // System.out.format("remove U+%04x (tccc=%d) + U+%04x (cc=%d)\n", c, tccc, c2, cc2); 136627f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFC].remove(c); 136727f654740f2a26ad62a5c155af9199af9e69b889claireho skipSets[UNORM_NFKC].remove(c); 136827f654740f2a26ad62a5c155af9199af9e69b889claireho break; 136927f654740f2a26ad62a5c155af9199af9e69b889claireho } 137027f654740f2a26ad62a5c155af9199af9e69b889claireho s.truncate(cLength); 137127f654740f2a26ad62a5c155af9199af9e69b889claireho } 137227f654740f2a26ad62a5c155af9199af9e69b889claireho } 137327f654740f2a26ad62a5c155af9199af9e69b889claireho } 137427f654740f2a26ad62a5c155af9199af9e69b889claireho delete [] combineBackCharsAndCc; 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruBasicNormalizerTest::TestSkippable() { 137950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeSet diff, skipSets[UNORM_MODE_COUNT], expectSets[UNORM_MODE_COUNT]; 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s, pattern; 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* build NF*Skippable sets from runtime data */ 138350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuTestErrorCode errorCode(*this, "TestSkippable"); 138450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skipSets[UNORM_NFD].applyPattern(UNICODE_STRING_SIMPLE("[:NFD_Inert:]"), errorCode); 138550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skipSets[UNORM_NFKD].applyPattern(UNICODE_STRING_SIMPLE("[:NFKD_Inert:]"), errorCode); 138650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skipSets[UNORM_NFC].applyPattern(UNICODE_STRING_SIMPLE("[:NFC_Inert:]"), errorCode); 138750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho skipSets[UNORM_NFKC].applyPattern(UNICODE_STRING_SIMPLE("[:NFKC_Inert:]"), errorCode); 138850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(errorCode.logDataIfFailureAndReset("UnicodeSet(NF..._Inert) failed")) { 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* get expected sets from hardcoded patterns */ 139327f654740f2a26ad62a5c155af9199af9e69b889claireho initExpectedSkippables(expectSets, errorCode); 139427f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode.assertSuccess(); 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=UNORM_NONE; i<UNORM_MODE_COUNT; ++i) { 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(skipSets[i]!=expectSets[i]) { 139827f654740f2a26ad62a5c155af9199af9e69b889claireho errln("error: TestSkippable skipSets[%d]!=expectedSets[%d]\n", i, i); 139927f654740f2a26ad62a5c155af9199af9e69b889claireho // Note: This used to depend on hardcoded UnicodeSet patterns generated by 140027f654740f2a26ad62a5c155af9199af9e69b889claireho // Mark's unicodetools.com.ibm.text.UCD.NFSkippable, by 140127f654740f2a26ad62a5c155af9199af9e69b889claireho // running com.ibm.text.UCD.Main with the option NFSkippable. 140227f654740f2a26ad62a5c155af9199af9e69b889claireho // Since ICU 4.6/Unicode 6, we are generating the 140327f654740f2a26ad62a5c155af9199af9e69b889claireho // expectSets ourselves in initSkippables(). 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=UNICODE_STRING_SIMPLE("skip-expect="); 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (diff=skipSets[i]).removeAll(expectSets[i]).toPattern(pattern, TRUE); 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append(pattern); 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.remove(); 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append(UNICODE_STRING_SIMPLE("\n\nexpect-skip=")); 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (diff=expectSets[i]).removeAll(skipSets[i]).toPattern(pattern, TRUE); 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append(pattern); 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.append(UNICODE_STRING_SIMPLE("\n\n")); 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(s); 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 142050294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostruct StringPair { const char *input, *expected; }; 142150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 142250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 142350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoBasicNormalizerTest::TestCustomComp() { 142450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const StringPair pairs[]={ 142550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD801\\uE000\\uDFFE", "" }, 142650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, 142750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, 142850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE002\\U000110B9\\u0327\\u0345" }, 142950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, 143050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, 143150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, 143250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" } 143350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 143450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomComp"); 143550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *customNorm2= 143650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2::getInstance(loadTestData(errorCode), "testnorm", 143750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_COMPOSE, errorCode); 143827f654740f2a26ad62a5c155af9199af9e69b889claireho if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm")) { 143950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 144050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=0; i<LENGTHOF(pairs); ++i) { 144250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const StringPair &pair=pairs[i]; 144350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); 144450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape(); 144550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result=customNorm2->normalize(input, errorCode); 144650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(result!=expected) { 144750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("custom compose Normalizer2 did not normalize input %d as expected", i); 144850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 144950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 145050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 145150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 145250294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehovoid 145350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoBasicNormalizerTest::TestCustomFCC() { 145450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho static const StringPair pairs[]={ 145550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD801\\uE000\\uDFFE", "" }, 145650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD800\\uD801\\uE000\\uDFFE\\uDFFF", "\\uD7FF\\uFFFF" }, 145750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uD800\\uD801\\uDFFE\\uDFFF", "\\uD7FF\\U000107FE\\uFFFF" }, 145850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // The following expected result is different from CustomComp 145950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // because of only-contiguous composition. 146050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE001\\U000110B9\\u0345\\u0308\\u0327", "\\uE001\\U000110B9\\u0327\\u0308\\u0345" }, 146150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE010\\U000F0011\\uE012", "\\uE011\\uE012" }, 146250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE010\\U000F0011\\U000F0011\\uE012", "\\uE011\\U000F0010" }, 146350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uE111\\u1161\\uE112\\u1162", "\\uAE4C\\u1102\\u0062\\u1162" }, 146450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho { "\\uFFF3\\uFFF7\\U00010036\\U00010077", "\\U00010037\\U00010037\\uFFF6\\U00010037" } 146550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho }; 146650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho IcuTestErrorCode errorCode(*this, "BasicNormalizerTest/TestCustomFCC"); 146750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const Normalizer2 *customNorm2= 146850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho Normalizer2::getInstance(loadTestData(errorCode), "testnorm", 146950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UNORM2_COMPOSE_CONTIGUOUS, errorCode); 147027f654740f2a26ad62a5c155af9199af9e69b889claireho if(errorCode.logDataIfFailureAndReset("unable to load testdata/testnorm.nrm")) { 147150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 147250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 147350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=0; i<LENGTHOF(pairs); ++i) { 147450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho const StringPair &pair=pairs[i]; 147550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString input=UnicodeString(pair.input, -1, US_INV).unescape(); 147650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString expected=UnicodeString(pair.expected, -1, US_INV).unescape(); 147750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString result=customNorm2->normalize(input, errorCode); 147850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(result!=expected) { 147950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("custom FCC Normalizer2 did not normalize input %d as expected", i); 148050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 148250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho} 148350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 148427f654740f2a26ad62a5c155af9199af9e69b889claireho/* Improve code coverage of Normalizer2 */ 148527f654740f2a26ad62a5c155af9199af9e69b889clairehovoid 148627f654740f2a26ad62a5c155af9199af9e69b889clairehoBasicNormalizerTest::TestFilteredNormalizer2Coverage() { 148727f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode errorCode = U_ZERO_ERROR; 148827f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); 148927f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(errorCode)) { 149027f654740f2a26ad62a5c155af9199af9e69b889claireho dataerrln("Normalizer2Factory::getNFCInstance() call failed - %s", u_errorName(status)); 149127f654740f2a26ad62a5c155af9199af9e69b889claireho return; 149227f654740f2a26ad62a5c155af9199af9e69b889claireho } 149383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeSet filter(UNICODE_STRING_SIMPLE("[^\\u00a0-\\u00ff\\u0310-\\u031f]"), errorCode); 149427f654740f2a26ad62a5c155af9199af9e69b889claireho FilteredNormalizer2 fn2(*nfcNorm2, filter); 149527f654740f2a26ad62a5c155af9199af9e69b889claireho 149627f654740f2a26ad62a5c155af9199af9e69b889claireho UChar32 char32 = 0x0054; 149727f654740f2a26ad62a5c155af9199af9e69b889claireho 149827f654740f2a26ad62a5c155af9199af9e69b889claireho if (fn2.isInert(char32)) { 149927f654740f2a26ad62a5c155af9199af9e69b889claireho errln("FilteredNormalizer2.isInert() failed."); 150027f654740f2a26ad62a5c155af9199af9e69b889claireho } 150127f654740f2a26ad62a5c155af9199af9e69b889claireho 150227f654740f2a26ad62a5c155af9199af9e69b889claireho if (fn2.hasBoundaryAfter(char32)) { 150327f654740f2a26ad62a5c155af9199af9e69b889claireho errln("FilteredNormalizer2.hasBoundaryAfter() failed."); 150427f654740f2a26ad62a5c155af9199af9e69b889claireho } 150527f654740f2a26ad62a5c155af9199af9e69b889claireho 150683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UChar32 c; 150783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius for(c=0; c<=0x3ff; ++c) { 150883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint8_t expectedCC= filter.contains(c) ? nfcNorm2->getCombiningClass(c) : 0; 150983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius uint8_t cc=fn2.getCombiningClass(c); 151083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(cc!=expectedCC) { 151183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius errln( 151283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString("FilteredNormalizer2(NFC, ^A0-FF,310-31F).getCombiningClass(U+")+ 151383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius hex(c)+ 151483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius ")==filtered NFC.getCC()"); 151583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 151683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius } 151783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 151883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString newString1 = UNICODE_STRING_SIMPLE("[^\\u0100-\\u01ff]"); 151983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius UnicodeString newString2 = UNICODE_STRING_SIMPLE("[^\\u0200-\\u02ff]"); 152027f654740f2a26ad62a5c155af9199af9e69b889claireho fn2.append(newString1, newString2, errorCode); 152127f654740f2a26ad62a5c155af9199af9e69b889claireho if (U_FAILURE(errorCode)) { 152227f654740f2a26ad62a5c155af9199af9e69b889claireho errln("FilteredNormalizer2.append() failed."); 152327f654740f2a26ad62a5c155af9199af9e69b889claireho } 152427f654740f2a26ad62a5c155af9199af9e69b889claireho} 152527f654740f2a26ad62a5c155af9199af9e69b889claireho 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 1527