1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/******************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * COPYRIGHT: 38393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * Copyright (c) 1997-2013, International Business Machines Corporation and 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************/ 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 118393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius#include "unicode/uscript.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 1350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#include "hash.h" 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "patternprops.h" 1527f654740f2a26ad62a5c155af9199af9e69b889claireho#include "normalizer2impl.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uparse.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucdtest.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 19c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0])) 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 2150294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic const char *ignorePropNames[]={ 2250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "FC_NFKC", 2350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "NFD_QC", 2450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "NFC_QC", 2550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "NFKD_QC", 2650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "NFKC_QC", 2750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Expands_On_NFD", 2850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Expands_On_NFC", 2950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Expands_On_NFKD", 3050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Expands_On_NFKC", 3150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "NFKC_CF" 3250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho}; 3350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeTest::UnicodeTest() 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 3650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 3750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); 3850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 3950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete unknownPropertyNames; 4050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unknownPropertyNames=NULL; 4150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 4250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // Ignore some property names altogether. 4350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(int32_t i=0; i<LENGTHOF(ignorePropNames); ++i) { 4450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode); 4550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeTest::~UnicodeTest() 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 5050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho delete unknownPropertyNames; 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(exec) { 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho logln("TestSuite UnicodeTest: "); 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO_BEGIN; 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO(TestAdditionalProperties); 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO(TestBinaryValues); 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO(TestConsistency); 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO(TestPatternProperties); 638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius TESTCASE_AUTO(TestScriptMetadata); 6459d709d503bab6e2b61931737e662dd293b40578ccornelius TESTCASE_AUTO(TestBidiPairedBracketType); 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho TESTCASE_AUTO_END; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//==================================================== 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// private data used by the tests 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//==================================================== 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// test DerivedCoreProperties.txt ------------------------------------------- 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// copied from genprops.c 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *t, *z; 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i, j; 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s=u_skipWhitespace(s); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(i=0; i<countTokens; ++i) { 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t=tokens[i]; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t!=NULL) { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(j=0;; ++j) { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(t[j]!=0) { 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(s[j]!=t[j]) { 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru z=u_skipWhitespace(s+j); 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(*z==';' || *z==0) { 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return i; 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char *const 10450294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoderivedPropsNames[]={ 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Math", 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Alphabetic", 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Lowercase", 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Uppercase", 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ID_Start", 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "ID_Continue", 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "XID_Start", 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "XID_Continue", 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Default_Ignorable_Code_Point", 11450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Full_Composition_Exclusion", 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Grapheme_Extend", 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */ 11750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Grapheme_Base", 11850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Cased", 11950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Case_Ignorable", 12050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_Lowercased", 12150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_Uppercased", 12250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_Titlecased", 12350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_Casefolded", 12450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_Casemapped", 12550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho "Changes_When_NFKC_Casefolded" 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UProperty 12950294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoderivedPropsIndex[]={ 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_MATH, 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_ALPHABETIC, 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_LOWERCASE, 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_UPPERCASE, 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_ID_START, 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_ID_CONTINUE, 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_XID_START, 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_XID_CONTINUE, 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 13950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_FULL_COMPOSITION_EXCLUSION, 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_GRAPHEME_EXTEND, 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCHAR_GRAPHEME_LINK, 14250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_GRAPHEME_BASE, 14350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CASED, 14450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CASE_IGNORABLE, 14550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_LOWERCASED, 14650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_UPPERCASED, 14750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_TITLECASED, 14850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_CASEFOLDED, 14950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_CASEMAPPED, 15050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15350294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehostatic int32_t numErrors[LENGTHOF(derivedPropsIndex)]={ 0 }; 15450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 15550294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoenum { MAX_ERRORS=50 }; 15650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC void U_CALLCONV 15850294ead5e5d23f5bbfed76e00e6b510bd41eee1clairehoderivedPropsLineFn(void *context, 15950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *fields[][2], int32_t /* fieldCount */, 16050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode *pErrorCode) 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeTest *me=(UnicodeTest *)context; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t start, end; 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(*pErrorCode)) { 16850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* parse derived binary property name, ignore unknown names */ 17350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho i=getTokenIndex(derivedPropsNames, LENGTHOF(derivedPropsNames), fields[1][0]); 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(i<0) { 17550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0])); 17650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho propName.trim(); 17750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(me->unknownPropertyNames->find(propName)==NULL) { 17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho UErrorCode errorCode=U_ZERO_ERROR; 17950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho me->unknownPropertyNames->puti(propName, 1, errorCode); 18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); 18150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 18550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho me->derivedProps[i].add(start, end); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid UnicodeTest::TestAdditionalProperties() { 18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_NORMALIZATION 19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt 19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(LENGTHOF(derivedProps)<LENGTHOF(derivedPropsNames)) { 19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n", 19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho LENGTHOF(derivedPropsNames)); 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)) { 19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("error in ucdtest.cpp: LENGTHOF(derivedPropsIndex)!=LENGTHOF(derivedPropsNames)\n"); 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char newPath[256]; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char backupPath[256]; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *fields[2][2]; 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Look inside ICU_DATA first */ 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(newPath, pathToDataDirectory()); 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(newPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // As a fallback, try to guess where the source data was located 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // at the time ICU was built, and look there. 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# ifdef U_TOPSRCDIR 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data"); 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# else 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcpy(backupPath, loadTestData(errorCode)); 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data"); 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru# endif 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(backupPath, U_FILE_SEP_STRING); 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru strcat(backupPath, "unidata" U_FILE_SEP_STRING "DerivedCoreProperties.txt"); 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 22150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *path=newPath; 22250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_parseDelimitedFile(newPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode); 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(errorCode==U_FILE_ACCESS_ERROR) { 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 22650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho path=backupPath; 22750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_parseDelimitedFile(backupPath, ';', fields, 2, derivedPropsLineFn, this, &errorCode); 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode)); 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 23350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho char *basename=path+strlen(path)-strlen("DerivedCoreProperties.txt"); 23450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho strcpy(basename, "DerivedNormalizationProps.txt"); 23550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); 23650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(U_FAILURE(errorCode)) { 23750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode)); 23850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho return; 23950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho } 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // now we have all derived core properties in the UnicodeSets 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // run them all through the API 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rangeCount, range; 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t i; 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 start, end; 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test all TRUE properties 24850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { 24950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rangeCount=derivedProps[i].getRangeCount(); 25050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { 25150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho start=derivedProps[i].getRangeStart(range); 25250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho end=derivedProps[i].getRangeEnd(range); 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(; start<=end; ++start) { 25450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { 25550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]); 25650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(++numErrors[i]>=MAX_ERRORS) { 25750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho dataerrln("Too many errors, moving to the next test"); 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // invert all properties 26650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { 26750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho derivedProps[i].complement(); 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test all FALSE properties 27150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(i=0; i<LENGTHOF(derivedPropsNames); ++i) { 27250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho rangeCount=derivedProps[i].getRangeCount(); 27350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { 27450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho start=derivedProps[i].getRangeStart(range); 27550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho end=derivedProps[i].getRangeEnd(range); 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for(; start<=end; ++start) { 27750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { 27850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]); 27950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho if(++numErrors[i]>=MAX_ERRORS) { 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errln("Too many errors, moving to the next test"); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 28750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif /* !UCONFIG_NO_NORMALIZATION */ 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 289c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 290c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruvoid UnicodeTest::TestBinaryValues() { 291c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru /* 292c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Unicode 5.1 explicitly defines binary property value aliases. 293c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru * Verify that they are all recognized. 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru */ 295c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 296c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); 297c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 2986d5deb12725f146643d443090dfa11b206df528aJean-Baptiste Queru dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode)); 299c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 300c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 301c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru 302c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *const falseValues[]={ "N", "No", "F", "False" }; 303c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; 304c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru int32_t i; 305c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i=0; i<LENGTHOF(falseValues); ++i) { 306c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 307c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV)); 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 309c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet set(pattern, errorCode); 310c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 311c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode)); 312c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 313c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 314c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru set.complement(); 315c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(set!=alpha) { 316c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]); 317c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 318c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 319c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru for(i=0; i<LENGTHOF(trueValues); ++i) { 320c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 321c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV)); 322c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errorCode=U_ZERO_ERROR; 323c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeSet set(pattern, errorCode); 324c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 325c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode)); 326c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru continue; 327c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 328c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if(set!=alpha) { 329c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]); 330c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 331c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 332c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru} 33327f654740f2a26ad62a5c155af9199af9e69b889claireho 33427f654740f2a26ad62a5c155af9199af9e69b889clairehovoid UnicodeTest::TestConsistency() { 33527f654740f2a26ad62a5c155af9199af9e69b889claireho#if !UCONFIG_NO_NORMALIZATION 33627f654740f2a26ad62a5c155af9199af9e69b889claireho /* 33727f654740f2a26ad62a5c155af9199af9e69b889claireho * Test for an example that getCanonStartSet() delivers 33827f654740f2a26ad62a5c155af9199af9e69b889claireho * all characters that compose from the input one, 33927f654740f2a26ad62a5c155af9199af9e69b889claireho * even in multiple steps. 34027f654740f2a26ad62a5c155af9199af9e69b889claireho * For example, the set for "I" (0049) should contain both 34127f654740f2a26ad62a5c155af9199af9e69b889claireho * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 34227f654740f2a26ad62a5c155af9199af9e69b889claireho * In general, the set for the middle such character should be a subset 34327f654740f2a26ad62a5c155af9199af9e69b889claireho * of the set for the first. 34427f654740f2a26ad62a5c155af9199af9e69b889claireho */ 34527f654740f2a26ad62a5c155af9199af9e69b889claireho IcuTestErrorCode errorCode(*this, "TestConsistency"); 346103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); 34727f654740f2a26ad62a5c155af9199af9e69b889claireho const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) { 34927f654740f2a26ad62a5c155af9199af9e69b889claireho dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n", 35027f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode.errorName()); 35127f654740f2a26ad62a5c155af9199af9e69b889claireho errorCode.reset(); 35227f654740f2a26ad62a5c155af9199af9e69b889claireho return; 35327f654740f2a26ad62a5c155af9199af9e69b889claireho } 35427f654740f2a26ad62a5c155af9199af9e69b889claireho 35527f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeSet set1, set2; 35627f654740f2a26ad62a5c155af9199af9e69b889claireho if (nfcImpl->getCanonStartSet(0x49, set1)) { 35727f654740f2a26ad62a5c155af9199af9e69b889claireho /* enumerate all characters that are plausible to be latin letters */ 35827f654740f2a26ad62a5c155af9199af9e69b889claireho for(UChar start=0xa0; start<0x2000; ++start) { 35927f654740f2a26ad62a5c155af9199af9e69b889claireho UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode); 36027f654740f2a26ad62a5c155af9199af9e69b889claireho if(decomp.length()>1 && decomp[0]==0x49) { 36127f654740f2a26ad62a5c155af9199af9e69b889claireho set2.add(start); 36227f654740f2a26ad62a5c155af9199af9e69b889claireho } 36327f654740f2a26ad62a5c155af9199af9e69b889claireho } 36427f654740f2a26ad62a5c155af9199af9e69b889claireho 36527f654740f2a26ad62a5c155af9199af9e69b889claireho if (set1!=set2) { 36627f654740f2a26ad62a5c155af9199af9e69b889claireho errln("[canon start set of 0049] != [all c with canon decomp with 0049]"); 36727f654740f2a26ad62a5c155af9199af9e69b889claireho } 36827f654740f2a26ad62a5c155af9199af9e69b889claireho // This was available in cucdtst.c but the test had to move to intltest 36927f654740f2a26ad62a5c155af9199af9e69b889claireho // because the new internal normalization functions are in C++. 37027f654740f2a26ad62a5c155af9199af9e69b889claireho //compareUSets(set1, set2, 37127f654740f2a26ad62a5c155af9199af9e69b889claireho // "[canon start set of 0049]", "[all c with canon decomp with 0049]", 37227f654740f2a26ad62a5c155af9199af9e69b889claireho // TRUE); 37327f654740f2a26ad62a5c155af9199af9e69b889claireho } else { 37427f654740f2a26ad62a5c155af9199af9e69b889claireho errln("NFC.getCanonStartSet() returned FALSE"); 37527f654740f2a26ad62a5c155af9199af9e69b889claireho } 37627f654740f2a26ad62a5c155af9199af9e69b889claireho#endif 37727f654740f2a26ad62a5c155af9199af9e69b889claireho} 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Test various implementations of Pattern_Syntax & Pattern_White_Space. 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid UnicodeTest::TestPatternProperties() { 383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho IcuTestErrorCode errorCode(*this, "TestPatternProperties()"); 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet syn_pp; 385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode); 386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet syn_list( 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "[!-/\\:-@\\[-\\^`\\{-~" 388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB\\u00BF\\u00D7\\u00F7" 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u245F\\u2500-\\u2775" 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uFD3E\\uFD3F\\uFE45\\uFE46]", errorCode); 391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet ws_pp; 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCode); 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"), errorCode); 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet syn_ws_pp; 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UnicodeSet syn_ws_prop(syn_prop); 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho syn_ws_prop.addAll(ws_prop); 397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(UChar32 c=0; c<=0xffff; ++c) { 398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(PatternProps::isSyntax(c)) { 399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho syn_pp.add(c); 400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(PatternProps::isWhiteSpace(c)) { 402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ws_pp.add(c); 403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(PatternProps::isSyntaxOrWhiteSpace(c)) { 405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho syn_ws_pp.add(c); 406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareUSets(syn_pp, syn_prop, 409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "PatternProps.isSyntax()", "[:Pattern_Syntax:]", TRUE); 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareUSets(syn_pp, syn_list, 411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", TRUE); 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareUSets(ws_pp, ws_prop, 413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", TRUE); 414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareUSets(ws_pp, ws_list, 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", TRUE); 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareUSets(syn_ws_pp, syn_ws_prop, 417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "PatternProps.isSyntaxOrWhiteSpace()", 418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho "[[:Pattern_Syntax:][:Pattern_White_Space:]]", TRUE); 419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// So far only minimal port of Java & cucdtst.c compareUSets(). 422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b, 424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *a_name, const char *b_name, 425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool diffIsError) { 426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool same= a==b; 427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!same && diffIsError) { 428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errln("Sets are different: %s vs. %s\n", a_name, b_name); 429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return same; 431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 4328393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4338393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusnamespace { 4348393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4358393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius/** 4368393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius * Maps a special script code to the most common script of its encoded characters. 4378393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius */ 4388393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig CorneliusUScriptCode getCharScript(UScriptCode script) { 4398393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius switch(script) { 4408393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case USCRIPT_SIMPLIFIED_HAN: 4418393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case USCRIPT_TRADITIONAL_HAN: 4428393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USCRIPT_HAN; 4438393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case USCRIPT_JAPANESE: 4448393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USCRIPT_HIRAGANA; 4458393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius case USCRIPT_KOREAN: 4468393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return USCRIPT_HANGUL; 4478393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius default: 4488393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius return script; 4498393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4508393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 4518393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4528393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} // namespace 4538393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4548393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Corneliusvoid UnicodeTest::TestScriptMetadata() { 4558393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius IcuTestErrorCode errorCode(*this, "TestScriptMetadata()"); 4568393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode); 4578393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // So far, sample characters are uppercase. 4588393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Georgian is special. 4598393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode); 4608393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) { 4618393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UScriptCode sc = (UScriptCode)sci; 4628393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // Run the test with -v to see which script has failures: 4638393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 3 FAIL 4648393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius logln(uscript_getShortName(sc)); 4658393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UScriptUsage usage = uscript_getUsage(sc); 4668393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString sample = uscript_getSampleUnicodeString(sc); 4678393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeSet scriptSet; 4688393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode); 4698393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(usage == USCRIPT_USAGE_NOT_ENCODED) { 4708393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertTrue("not encoded, no sample", sample.isEmpty()); 4718393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc)); 4728393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc)); 4738393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertFalse("not encoded, not cased", uscript_isCased(sc)); 4748393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertTrue("not encoded, no characters", scriptSet.isEmpty()); 4758393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } else { 4768393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertFalse("encoded, has a sample character", sample.isEmpty()); 4778393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UChar32 firstChar = sample.char32At(0); 4788393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UScriptCode charScript = getCharScript(sc); 4798393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertEquals("script(sample(script))", 48059d709d503bab6e2b61931737e662dd293b40578ccornelius (int32_t)charScript, (int32_t)uscript_getScript(firstChar, errorCode)); 48159d709d503bab6e2b61931737e662dd293b40578ccornelius assertEquals("RTL vs. set", (UBool)rtl.contains(firstChar), (UBool)uscript_isRightToLeft(sc)); 48259d709d503bab6e2b61931737e662dd293b40578ccornelius assertEquals("cased vs. set", (UBool)cased.contains(firstChar), (UBool)uscript_isCased(sc)); 48359d709d503bab6e2b61931737e662dd293b40578ccornelius assertEquals("encoded, has characters", (UBool)(sc == charScript), (UBool)(!scriptSet.isEmpty())); 4848393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(uscript_isRightToLeft(sc)) { 4858393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius rtl.removeAll(scriptSet); 4868393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4878393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius if(uscript_isCased(sc)) { 4888393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius cased.removeAll(scriptSet); 4898393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4908393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4918393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius } 4928393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString pattern; 4938393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertEquals("no remaining RTL characters", 4948393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString("[]"), rtl.toPattern(pattern)); 4958393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertEquals("no remaining cased characters", 4968393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius UnicodeString("[]"), cased.toPattern(pattern)); 4978393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius 4988393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN)); 4998393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI)); 5008393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN)); 5018393335b955da7340c9f19b1b4b2d6c0c2c04be7Craig Cornelius} 50259d709d503bab6e2b61931737e662dd293b40578ccornelius 50359d709d503bab6e2b61931737e662dd293b40578ccorneliusvoid UnicodeTest::TestBidiPairedBracketType() { 50459d709d503bab6e2b61931737e662dd293b40578ccornelius // BidiBrackets-6.3.0.txt says: 50559d709d503bab6e2b61931737e662dd293b40578ccornelius // 50659d709d503bab6e2b61931737e662dd293b40578ccornelius // The set of code points listed in this file was originally derived 50759d709d503bab6e2b61931737e662dd293b40578ccornelius // using the character properties General_Category (gc), Bidi_Class (bc), 50859d709d503bab6e2b61931737e662dd293b40578ccornelius // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: 50959d709d503bab6e2b61931737e662dd293b40578ccornelius // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, 51059d709d503bab6e2b61931737e662dd293b40578ccornelius // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket 51159d709d503bab6e2b61931737e662dd293b40578ccornelius // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type 51259d709d503bab6e2b61931737e662dd293b40578ccornelius // property values are Open and Close, respectively. 51359d709d503bab6e2b61931737e662dd293b40578ccornelius IcuTestErrorCode errorCode(*this, "TestBidiPairedBracketType()"); 51459d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet bpt("[:^bpt=n:]", errorCode); 51559d709d503bab6e2b61931737e662dd293b40578ccornelius assertTrue("bpt!=None is not empty", !bpt.isEmpty()); 51659d709d503bab6e2b61931737e662dd293b40578ccornelius // The following should always be true. 51759d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet mirrored("[:Bidi_M:]", errorCode); 51859d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet other_neutral("[:bc=ON:]", errorCode); 51959d709d503bab6e2b61931737e662dd293b40578ccornelius assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); 52059d709d503bab6e2b61931737e662dd293b40578ccornelius assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt)); 52159d709d503bab6e2b61931737e662dd293b40578ccornelius // The following are true at least initially in Unicode 6.3. 52259d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet bpt_open("[:bpt=o:]", errorCode); 52359d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet bpt_close("[:bpt=c:]", errorCode); 52459d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet ps("[:Ps:]", errorCode); 52559d709d503bab6e2b61931737e662dd293b40578ccornelius UnicodeSet pe("[:Pe:]", errorCode); 52659d709d503bab6e2b61931737e662dd293b40578ccornelius assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); 52759d709d503bab6e2b61931737e662dd293b40578ccornelius assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); 52859d709d503bab6e2b61931737e662dd293b40578ccornelius} 529