1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************ 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (c) 1997-2010, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru************************************************************************ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/normlzr.h" 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "filestrm.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "normconf.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#define CASE(id,test,exec) case id: \ 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = #test; \ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exec) { \ 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(#test "---"); \ 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)""); \ 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru test(); \ 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } \ 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (index) { 3450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CASE(0, TestConformance, exec); 3550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho CASE(1, TestConformance32, exec); 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // CASE(2, TestCase6); 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru default: name = ""; break; 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define FIELD_COUNT 5 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizerConformanceTest::NormalizerConformanceTest() : 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizer(UnicodeString(), UNORM_NFC) {} 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizerConformanceTest::~NormalizerConformanceTest() {} 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// more interesting conformance test cases, not in the unicode.org NormalizationTest.txt 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *moreCases[]={ 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Markus 2001aug30 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0", 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::compare(const UnicodeString& s1, const UnicodeString& s2){ 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO: Re-enable this tests after UTC fixes UAX 21 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s1.indexOf((UChar32)0x0345)>=0)return; 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(Normalizer::compare(s1,s2,U_FOLD_CASE_DEFAULT,status)!=0){ 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer::compare() failed for s1: " + prettify(s1) + " s2: " +prettify(s2)); 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruFileStream * 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruNormalizerConformanceTest::openNormalizationTestFile(const char *filename) { 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char unidataPath[2000]; 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *folder; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FileStream *input; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode; 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // look inside ICU_DATA first 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru folder=pathToDataDirectory(); 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(folder!=NULL) { 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, folder); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, "unidata" U_FILE_SEP_STRING); 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find icu/source/data/unidata relative to the test data 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru folder=loadTestData(errorCode); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, folder); 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // look in icu/source/test/testdata/out/build 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru folder=loadTestData(errorCode); 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, folder); 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING); 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // look in icu/source/test/testdata 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru folder=loadTestData(errorCode); 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, folder); 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING); 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find icu/source/data/unidata relative to U_TOPSRCDIR 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(U_TOPSRCDIR) 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING); 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(unidataPath, filename); 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input!=NULL) { 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return input; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1456d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("Failed to open %s", filename); 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test the conformance of Normalizer to 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance() { 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestConformance(openNormalizationTestFile("NormalizationTest.txt"), 0); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance32() { 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TestConformance(openNormalizationTestFile("NormalizationTest-3.2.0.txt"), UNORM_UNICODE_3_2); 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance(FileStream *input, int32_t options) { 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum { BUF_SIZE = 1024 }; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char lineBuf[BUF_SIZE]; 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[FIELD_COUNT]; 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passCount = 0; 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t failCount = 0; 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(input==NULL) { 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeSet for all code points that are not mentioned in NormalizationTest.txt 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet other(0, 0x10ffff); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t count, countMoreCases = sizeof(moreCases)/sizeof(moreCases[0]); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (count = 1;;++count) { 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!T_FileStream_eof(input)) { 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf)); 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // once NormalizationTest.txt is finished, use moreCases[] 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(count > countMoreCases) { 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru count = 0; 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(count == countMoreCases) { 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all done 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(lineBuf, moreCases[count]); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lineBuf[0] == 0 || lineBuf[0] == '\n' || lineBuf[0] == '\r') continue; 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Expect 5 columns of this format: 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse out the comment. 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lineBuf[0] == '#') continue; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Read separator lines starting with '@' 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lineBuf[0] == '@') { 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln(lineBuf); 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse out the fields 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!hexsplit(lineBuf, ';', fields, FIELD_COUNT)) { 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Unable to parse line " + count); 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; // Syntax error 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Remove a single code point from the "other" UnicodeSet 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(fields[0].length()==fields[0].moveIndex32(0, 1)) { 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=fields[0].char32At(0); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0xac20<=c && c<=0xd73f && quick) { 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // not an exhaustive test run: skip most Hangul syllables 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(c==0xac20) { 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru other.remove(0xac20, 0xd73f); 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru other.remove(c); 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (checkConformance(fields, lineBuf, options, status)) { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++passCount; 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++failCount; 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status == U_FILE_ACCESS_ERROR) { 22950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Something is wrong with the normalizer, skipping the rest of the test."); 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((count % 1000) == 0) { 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Line %d", count); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru T_FileStream_close(input); 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Test that all characters that are not mentioned 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as single code points in column 1 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * do not change under any normalization. 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove U+ffff because that is the end-of-iteration sentinel value 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru other.remove(0xffff); 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(c=0; c<=0x10ffff; quick ? c+=113 : ++c) { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(0x30000<=c && c<0xe0000) { 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c=0xe0000; 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!other.contains(c)) { 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fields[0]=fields[1]=fields[2]=fields[3]=fields[4].setTo(c); 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sprintf(lineBuf, "not mentioned code point U+%04lx", (long)c); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (checkConformance(fields, lineBuf, options, status)) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++passCount; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++failCount; 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(status == U_FILE_ACCESS_ERROR) { 26550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Something is wrong with the normalizer, skipping the rest of the test.: %s", u_errorName(status)); 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c % 0x1000) == 0) { 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln("Code point U+%04lx", c); 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (failCount != 0) { 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln((UnicodeString)"Total: " + failCount + " lines/code points failed, " + 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru passCount + " lines/code points passed"); 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru logln((UnicodeString)"Total: " + passCount + " lines/code points passed"); 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Verify the conformance of the given line of the Unicode 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * normalization (UTR 15) test suite file. For each line, 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there are five columns, corresponding to field[0]..field[4]. 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The following invariants must be true for all conformant implementations 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c2 == NFC(c1) == NFC(c2) == NFC(c3) 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c3 == NFD(c1) == NFD(c2) == NFD(c3) 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param field the 5 columns 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param line the source line from the test suite file 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return true if the test passes 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *line, 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t options, 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool pass = TRUE, result; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //UErrorCode status = U_ZERO_ERROR; 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString out, fcd; 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fieldNum; 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<FIELD_COUNT; ++i) { 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fieldNum = i+1; 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i<3) { 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFC, options, out, status); 31050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 31150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error running normalize UNORM_NFC: %s", u_errorName(status)); 31250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 31350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum); 31450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFC, options, out, +1); 31550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum); 31650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFC, options, out, -1); 31750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum); 31850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFD, options, out, status); 32150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 32250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error running normalize UNORM_NFD: %s", u_errorName(status)); 32350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 32450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum); 32550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFD, options, out, +1); 32650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum); 32750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFD, options, out, -1); 32850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum); 32950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFKC, options, out, status); 33250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 33350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error running normalize UNORM_NFKC: %s", u_errorName(status)); 33450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 33550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum); 33650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFKC, options, out, +1); 33750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum); 33850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFKC, options, out, -1); 33950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum); 34050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFKD, options, out, status); 34350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if (U_FAILURE(status)) { 34450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Error running normalize UNORM_NFKD: %s", u_errorName(status)); 34550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } else { 34650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum); 34750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFKD, options, out, +1); 34850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum); 34950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho iterativeNorm(field[i], UNORM_NFKD, options, out, -1); 35050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum); 35150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compare(field[1],field[2]); 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru compare(field[0],field[1]); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test quick checks 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, options, status)) { 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFC(s), UNORM_NFC) is UNORM_NO"); 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, options, status)) { 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFD(s), UNORM_NFD) is UNORM_NO"); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, options, status)) { 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKC(s), UNORM_NFKC) is UNORM_NO"); 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, options, status)) { 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKD(s), UNORM_NFKD) is UNORM_NO"); 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // branch on options==0 for better code coverage 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(options==0) { 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = Normalizer::isNormalized(field[1], UNORM_NFC, status); 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = Normalizer::isNormalized(field[1], UNORM_NFC, options, status); 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!result) { 38050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE"); 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) { 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE"); 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) { 38850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE"); 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE"); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test FCD quick check and "makeFCD" 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(field[0], UNORM_FCD, options, fcd, status); 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, options, status)) { 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(FCD(s), UNORM_FCD) is UNORM_NO"); 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, options, status)) { 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFD(s), UNORM_FCD) is UNORM_NO"); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, options, status)) { 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKD(s), UNORM_FCD) is UNORM_NO"); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Normalizer::normalize(fcd, UNORM_NFD, options, out, status); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(out != field[2]) { 41350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Normalizer error: NFD(FCD(s))!=NFD(s)"); 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 41850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Normalizer::normalize returned error status: %s", u_errorName(status)); 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass = FALSE; 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(field[0]!=field[2]) { 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // two strings that are canonically equivalent must test 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // equal under a canonical caseless match 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // see UAX #21 Case Mappings and Jitterbug 2021 and 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unicode Technical Committee meeting consensus 92-C31 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rc; 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status=U_ZERO_ERROR; 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status); 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(status)) { 43250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status)); 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass=FALSE; 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if(rc!=0) { 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Normalizer::compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc); 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pass=FALSE; 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!pass) { 44150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("FAIL: %s", line); 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return pass; 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Do a normalization using the iterative API in the given direction. 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir either +1 or -1 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::iterativeNorm(const UnicodeString& str, 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result, 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t dir) { 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizer.setText(str, status); 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizer.setMode(mode); 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizer.setOption(-1, 0); // reset all options 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru normalizer.setOption(options, 1); // set desired options 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 ch; 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir > 0) { 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ch = normalizer.first(); ch != Normalizer::DONE; 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = normalizer.next()) { 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(ch); 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (ch = normalizer.last(); ch != Normalizer::DONE; 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ch = normalizer.previous()) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.insert(0, ch); 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param op name of normalization form, e.g., "KC" 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param s string being normalized 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param got value received 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param exp expected value 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param msg description of this test 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param return true if got == exp 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool NormalizerConformanceTest::assertEqual(const char *op, 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& s, 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& got, 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& exp, 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *msg, 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t field) 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (exp == got) 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *sChars, *gotChars, *expChars; 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString sPretty(prettify(s)); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString gotPretty(prettify(got)); 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString expPretty(prettify(exp)); 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sChars = new char[sPretty.length() + 1]; 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gotChars = new char[gotPretty.length() + 1]; 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expChars = new char[expPretty.length() + 1]; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sPretty.extract(0, sPretty.length(), sChars, sPretty.length() + 1); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sChars[sPretty.length()] = 0; 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gotPretty.extract(0, gotPretty.length(), gotChars, gotPretty.length() + 1); 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gotChars[gotPretty.length()] = 0; 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expPretty.extract(0, expPretty.length(), expChars, expPretty.length() + 1); 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expChars[expPretty.length()] = 0; 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(" %s%d)%s(%s)=%s, exp. %s", msg, field, op, sChars, gotChars, expChars); 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete []sChars; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete []gotChars; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete []expChars; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Split a string into pieces based on the given delimiter 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character. Then, parse the resultant fields from hex into 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * characters. That is, "0040 0400;0C00;0899" -> new String[] { 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * be of the proper length already, and exactly output.length 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * fields are parsed. If there are too few an exception is 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * thrown. If there are too many the extras are ignored. 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return FALSE upon failure 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool NormalizerConformanceTest::hexsplit(const char *s, char delimiter, 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString output[], int32_t outputLength) { 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *t = s; 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *end = NULL; 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<outputLength; ++i) { 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip whitespace 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*t == ' ' || *t == '\t') { 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++t; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // read a sequence of code points 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output[i].remove(); 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(;;) { 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = (UChar32)uprv_strtoul(t, &end, 16); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if( (char *)t == end || 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (uint32_t)c > 0x10ffff || 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (*end != ' ' && *end != '\t' && *end != delimiter) 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("Bad field ", "") + (i + 1) + " in " + UnicodeString(s, "")); 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru output[i].append(c); 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = (const char *)end; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // skip whitespace 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while(*t == ' ' || *t == '\t') { 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++t; 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*t == delimiter) { 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++t; 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*t == 0) { 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if((i + 1) == outputLength) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln(UnicodeString("Missing field(s) in ", "") + s + " only " + (i + 1) + " out of " + outputLength); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Specific tests for debugging. These are generally failures taken from 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the conformance file, but culled out to make debugging easier. 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::TestCase6(void) { 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid NormalizerConformanceTest::_testOneLine(const char *line) { 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString fields[FIELD_COUNT]; 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!hexsplit(line, ';', fields, FIELD_COUNT)) { 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln((UnicodeString)"Unable to parse line " + line); 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru checkConformance(fields, line, 0, status); 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 599