164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************ 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (c) 1997-2003, International Business Machines 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************ 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _NORMCONF 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _NORMCONF 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/normlzr.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "intltest.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct _FileStream FileStream; 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass NormalizerConformanceTest : public IntlTest { 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Normalizer normalizer; 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public: 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru NormalizerConformanceTest(); 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru virtual ~NormalizerConformanceTest(); 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=NULL); 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Test the conformance of Normalizer to 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void TestConformance(); 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void TestConformance32(); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void TestConformance(FileStream *input, int32_t options); 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Specific tests for debugging. These are generally failures taken from 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the conformance file, but culled out to make debugging easier. 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void TestCase6(void); 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private: 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FileStream *openNormalizationTestFile(const char *filename); 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Verify the conformance of the given line of the Unicode 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * normalization (UTR 15) test suite file. For each line, 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * there are five columns, corresponding to field[0]..field[4]. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The following invariants must be true for all conformant implementations 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c2 == NFC(c1) == NFC(c2) == NFC(c3) 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c3 == NFD(c1) == NFD(c2) == NFD(c3) 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5) 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5) 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param field the 5 columns 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param line the source line from the test suite file 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return true if the test passes 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool checkConformance(const UnicodeString* field, 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *line, 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t options, 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode &status); 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void iterativeNorm(const UnicodeString& str, 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNormalizationMode mode, int32_t options, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString& result, 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t dir); 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param op name of normalization form, e.g., "KC" 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param s string being normalized 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param got value received 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param exp expected value 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param msg description of this test 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param return true if got == exp 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool assertEqual(const char *op, 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& s, 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& got, 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& exp, 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char *msg, 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t field); 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Split a string into pieces based on the given delimiter 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * character. Then, parse the resultant fields from hex into 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * characters. That is, "0040 0400;0C00;0899" -> new String[] { 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * "\u0040\u0400", "\u0C00", "\u0899" }. The output is assumed to 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be of the proper length already, and exactly output.length 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * fields are parsed. If there are too few an exception is 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * thrown. If there are too many the extras are ignored. 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param buf scratch buffer 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return FALSE upon failure 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool hexsplit(const char *s, char delimiter, 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString output[], int32_t outputLength); 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void _testOneLine(const char *line); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void compare(const UnicodeString& s1,const UnicodeString& s2); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 108