1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************ 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (c) 1997-2009, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************ 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/normlzr.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/putil.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unormimp.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "normconf.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0])) 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define CASE(id,test) case id: \ 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name = #test; \ 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (exec) { \ 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln(#test "---"); \ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln((UnicodeString)""); \ 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru test(); \ 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } \ 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch (index) { 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CASE(0, TestConformance); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru CASE(1, TestConformance32); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // CASE(2, TestCase6); 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: name = ""; break; 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define FIELD_COUNT 5 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNormalizerConformanceTest::NormalizerConformanceTest() : 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer(UnicodeString(), UNORM_NFC) {} 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNormalizerConformanceTest::~NormalizerConformanceTest() {} 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// more interesting conformance test cases, not in the unicode.org NormalizationTest.txt 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const char *moreCases[]={ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Markus 2001aug30 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "0061 0332 0308;00E4 0332;0061 0332 0308;00E4 0332;0061 0332 0308; # Markus 0", 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Markus 2001oct26 - test edge case for iteration: U+0f73.cc==0 but decomposition.lead.cc==129 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "0061 0301 0F73;00E1 0F71 0F72;0061 0F71 0F72 0301;00E1 0F71 0F72;0061 0F71 0F72 0301; # Markus 1" 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::compare(const UnicodeString& s1, const UnicodeString& s2){ 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status=U_ZERO_ERROR; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO: Re-enable this tests after UTC fixes UAX 21 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(s1.indexOf((UChar32)0x0345)>=0)return; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(Normalizer::compare(s1,s2,U_FOLD_CASE_DEFAULT,status)!=0){ 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer::compare() failed for s1: " + prettify(s1) + " s2: " +prettify(s2)); 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruFileStream * 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruNormalizerConformanceTest::openNormalizationTestFile(const char *filename) { 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char unidataPath[2000]; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *folder; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FileStream *input; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // look inside ICU_DATA first 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru folder=pathToDataDirectory(); 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(folder!=NULL) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, folder); 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, "unidata" U_FILE_SEP_STRING); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // find icu/source/data/unidata relative to the test data 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru folder=loadTestData(errorCode); 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, folder); 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // look in icu/source/test/testdata/out/build 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru folder=loadTestData(errorCode); 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, folder); 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // look in icu/source/test/testdata 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru folder=loadTestData(errorCode); 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(errorCode)) { 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, folder); 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING); 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // find icu/source/data/unidata relative to U_TOPSRCDIR 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if defined(U_TOPSRCDIR) 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "data" U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcpy(unidataPath, U_TOPSRCDIR U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING); 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru strcat(unidataPath, filename); 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru input=T_FileStream_open(unidataPath, "rb"); 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input!=NULL) { 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return input; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 14485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho dataerrln("Failed to open %s", filename); 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Test the conformance of Normalizer to 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance() { 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TestConformance(openNormalizationTestFile("NormalizationTest.txt"), 0); 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance32() { 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TestConformance(openNormalizationTestFile("NormalizationTest-3.2.0.txt"), UNORM_UNICODE_3_2); 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::TestConformance(FileStream *input, int32_t options) { 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru enum { BUF_SIZE = 1024 }; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char lineBuf[BUF_SIZE]; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString fields[FIELD_COUNT]; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t passCount = 0; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t failCount = 0; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(input==NULL) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // UnicodeSet for all code points that are not mentioned in NormalizationTest.txt 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet other(0, 0x10ffff); 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t count, countMoreCases = sizeof(moreCases)/sizeof(moreCases[0]); 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (count = 1;;++count) { 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!T_FileStream_eof(input)) { 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_readLine(input, lineBuf, (int32_t)sizeof(lineBuf)); 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // once NormalizationTest.txt is finished, use moreCases[] 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(count > countMoreCases) { 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = 0; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(count == countMoreCases) { 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // all done 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(lineBuf, moreCases[count]); 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lineBuf[0] == 0 || lineBuf[0] == '\n' || lineBuf[0] == '\r') continue; 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Expect 5 columns of this format: 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // 1E0C;1E0C;0044 0323;1E0C;0044 0323; # <comments> 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Parse out the comment. 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lineBuf[0] == '#') continue; 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Read separator lines starting with '@' 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lineBuf[0] == '@') { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln(lineBuf); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Parse out the fields 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!hexsplit(lineBuf, ';', fields, FIELD_COUNT)) { 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln((UnicodeString)"Unable to parse line " + count); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; // Syntax error 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Remove a single code point from the "other" UnicodeSet 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(fields[0].length()==fields[0].moveIndex32(0, 1)) { 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=fields[0].char32At(0); 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(0xac20<=c && c<=0xd73f && quick) { 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // not an exhaustive test run: skip most Hangul syllables 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c==0xac20) { 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru other.remove(0xac20, 0xd73f); 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru other.remove(c); 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (checkConformance(fields, lineBuf, options, status)) { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++passCount; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++failCount; 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(status == U_FILE_ACCESS_ERROR) { 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Something is wrong with the normalizer, skipping the rest of the test."); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((count % 1000) == 0) { 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln("Line %d", count); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_close(input); 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Test that all characters that are not mentioned 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * as single code points in column 1 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * do not change under any normalization. 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // remove U+ffff because that is the end-of-iteration sentinel value 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru other.remove(0xffff); 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(c=0; c<=0x10ffff; quick ? c+=113 : ++c) { 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(0x30000<=c && c<0xe0000) { 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c=0xe0000; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!other.contains(c)) { 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fields[0]=fields[1]=fields[2]=fields[3]=fields[4].setTo(c); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprintf(lineBuf, "not mentioned code point U+%04lx", (long)c); 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (checkConformance(fields, lineBuf, options, status)) { 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++passCount; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++failCount; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(status == U_FILE_ACCESS_ERROR) { 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Something is wrong with the normalizer, skipping the rest of the test."); 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ((c % 0x1000) == 0) { 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln("Code point U+%04lx", c); 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (failCount != 0) { 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln((UnicodeString)"Total: " + failCount + " lines/code points failed, " + 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru passCount + " lines/code points passed"); 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru logln((UnicodeString)"Total: " + passCount + " lines/code points passed"); 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Verify the conformance of the given line of the Unicode 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normalization (UTR 15) test suite file. For each line, 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there are five columns, corresponding to field[0]..field[4]. 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The following invariants must be true for all conformant implementations 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c2 == NFC(c1) == NFC(c2) == NFC(c3) 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c3 == NFD(c1) == NFD(c2) == NFD(c3) 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param field the 5 columns 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param line the source line from the test suite file 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if the test passes 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool NormalizerConformanceTest::checkConformance(const UnicodeString* field, 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *line, 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t options, 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status) { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool pass = TRUE, result; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru //UErrorCode status = U_ZERO_ERROR; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString out, fcd; 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fieldNum; 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<FIELD_COUNT; ++i) { 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fieldNum = i+1; 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i<3) { 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFC, options, out, status); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("C", field[i], out, field[1], "c2!=C(c", fieldNum); 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFC, options, out, +1); 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("C(+1)", field[i], out, field[1], "c2!=C(c", fieldNum); 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFC, options, out, -1); 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("C(-1)", field[i], out, field[1], "c2!=C(c", fieldNum); 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFD, options, out, status); 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("D", field[i], out, field[2], "c3!=D(c", fieldNum); 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFD, options, out, +1); 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("D(+1)", field[i], out, field[2], "c3!=D(c", fieldNum); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFD, options, out, -1); 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("D(-1)", field[i], out, field[2], "c3!=D(c", fieldNum); 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFKC, options, out, status); 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KC", field[i], out, field[3], "c4!=KC(c", fieldNum); 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFKC, options, out, +1); 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KC(+1)", field[i], out, field[3], "c4!=KC(c", fieldNum); 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFKC, options, out, -1); 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KC(-1)", field[i], out, field[3], "c4!=KC(c", fieldNum); 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(field[i], UNORM_NFKD, options, out, status); 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KD", field[i], out, field[4], "c5!=KD(c", fieldNum); 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFKD, options, out, +1); 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KD(+1)", field[i], out, field[4], "c5!=KD(c", fieldNum); 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru iterativeNorm(field[i], UNORM_NFKD, options, out, -1); 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass &= assertEqual("KD(-1)", field[i], out, field[4], "c5!=KD(c", fieldNum); 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru compare(field[1],field[2]); 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru compare(field[0],field[1]); 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // test quick checks 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[1], UNORM_NFC, options, status)) { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFC(s), UNORM_NFC) is UNORM_NO"); 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_NFD, options, status)) { 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFD(s), UNORM_NFD) is UNORM_NO"); 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[3], UNORM_NFKC, options, status)) { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKC(s), UNORM_NFKC) is UNORM_NO"); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_NFKD, options, status)) { 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKD(s), UNORM_NFKD) is UNORM_NO"); 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // branch on options==0 for better code coverage 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(options==0) { 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = Normalizer::isNormalized(field[1], UNORM_NFC, status); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result = Normalizer::isNormalized(field[1], UNORM_NFC, options, status); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!result) { 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: isNormalized(NFC(s), UNORM_NFC) is FALSE"); 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(field[0]!=field[1] && Normalizer::isNormalized(field[0], UNORM_NFC, options, status)) { 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: isNormalized(s, UNORM_NFC) is TRUE"); 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!Normalizer::isNormalized(field[3], UNORM_NFKC, options, status)) { 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: isNormalized(NFKC(s), UNORM_NFKC) is FALSE"); 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(field[0]!=field[3] && Normalizer::isNormalized(field[0], UNORM_NFKC, options, status)) { 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: isNormalized(s, UNORM_NFKC) is TRUE"); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // test FCD quick check and "makeFCD" 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(field[0], UNORM_FCD, options, fcd, status); 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(fcd, UNORM_FCD, options, status)) { 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(FCD(s), UNORM_FCD) is UNORM_NO"); 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[2], UNORM_FCD, options, status)) { 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFD(s), UNORM_FCD) is UNORM_NO"); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(UNORM_NO == Normalizer::quickCheck(field[4], UNORM_FCD, options, status)) { 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: quickCheck(NFKD(s), UNORM_FCD) is UNORM_NO"); 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer::normalize(fcd, UNORM_NFD, options, out, status); 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(out != field[2]) { 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer error: NFD(FCD(s))!=NFD(s)"); 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer::normalize returned error status"); 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass = FALSE; 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(field[0]!=field[2]) { 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // two strings that are canonically equivalent must test 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // equal under a canonical caseless match 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // see UAX #21 Case Mappings and Jitterbug 2021 and 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Unicode Technical Committee meeting consensus 92-C31 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t rc; 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status=U_ZERO_ERROR; 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rc=Normalizer::compare(field[0], field[2], (options<<UNORM_COMPARE_NORM_OPTIONS_SHIFT)|U_COMPARE_IGNORE_CASE, status); 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(status)) { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer::compare(case-insensitive) sets %s", u_errorName(status)); 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass=FALSE; 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(rc!=0) { 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("Normalizer::compare(original, NFD, case-insensitive) returned %d instead of 0 for equal", rc); 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pass=FALSE; 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!pass) { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln("FAIL: %s", line); 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return pass; 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Do a normalization using the iterative API in the given direction. 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param dir either +1 or -1 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::iterativeNorm(const UnicodeString& str, 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString& result, 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t dir) { 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer.setText(str, status); 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer.setMode(mode); 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer.setOption(-1, 0); // reset all options 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru normalizer.setOption(options, 1); // set desired options 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.truncate(0); 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (dir > 0) { 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (ch = normalizer.first(); ch != Normalizer::DONE; 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = normalizer.next()) { 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.append(ch); 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (ch = normalizer.last(); ch != Normalizer::DONE; 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = normalizer.previous()) { 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.insert(0, ch); 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param op name of normalization form, e.g., "KC" 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param s string being normalized 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param got value received 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param exp expected value 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param msg description of this test 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param return true if got == exp 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool NormalizerConformanceTest::assertEqual(const char *op, 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& s, 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& got, 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& exp, 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *msg, 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t field) 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (exp == got) 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *sChars, *gotChars, *expChars; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString sPretty(prettify(s)); 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString gotPretty(prettify(got)); 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString expPretty(prettify(exp)); 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sChars = new char[sPretty.length() + 1]; 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gotChars = new char[gotPretty.length() + 1]; 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru expChars = new char[expPretty.length() + 1]; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sPretty.extract(0, sPretty.length(), sChars, sPretty.length() + 1); 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sChars[sPretty.length()] = 0; 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gotPretty.extract(0, gotPretty.length(), gotChars, gotPretty.length() + 1); 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru gotChars[gotPretty.length()] = 0; 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru expPretty.extract(0, expPretty.length(), expChars, expPretty.length() + 1); 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru expChars[expPretty.length()] = 0; 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln(" %s%d)%s(%s)=%s, exp. %s", msg, field, op, sChars, gotChars, expChars); 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete []sChars; 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete []gotChars; 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete []expChars; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Split a string into pieces based on the given delimiter 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character. Then, parse the resultant fields from hex into 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters. That is, "0040 0400;0C00;0899" -> new String[] { 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be of the proper length already, and exactly output.length 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fields are parsed. If there are too few an exception is 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * thrown. If there are too many the extras are ignored. 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return FALSE upon failure 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool NormalizerConformanceTest::hexsplit(const char *s, char delimiter, 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString output[], int32_t outputLength) { 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *t = s; 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char *end = NULL; 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<outputLength; ++i) { 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // skip whitespace 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(*t == ' ' || *t == '\t') { 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++t; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // read a sequence of code points 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru output[i].remove(); 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;) { 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = (UChar32)uprv_strtoul(t, &end, 16); 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (char *)t == end || 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)c > 0x10ffff || 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (*end != ' ' && *end != '\t' && *end != delimiter) 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ) { 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln(UnicodeString("Bad field ", "") + (i + 1) + " in " + UnicodeString(s, "")); 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru output[i].append(c); 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = (const char *)end; 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // skip whitespace 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(*t == ' ' || *t == '\t') { 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++t; 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*t == delimiter) { 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++t; 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*t == 0) { 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((i + 1) == outputLength) { 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln(UnicodeString("Missing field(s) in ", "") + s + " only " + (i + 1) + " out of " + outputLength); 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Specific tests for debugging. These are generally failures taken from 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// the conformance file, but culled out to make debugging easier. 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::TestCase6(void) { 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _testOneLine("0385;0385;00A8 0301;0020 0308 0301;0020 0308 0301;"); 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid NormalizerConformanceTest::_testOneLine(const char *line) { 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString fields[FIELD_COUNT]; 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!hexsplit(line, ';', fields, FIELD_COUNT)) { 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru errln((UnicodeString)"Unable to parse line " + line); 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru checkConformance(fields, line, 0, status); 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 582