164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Copyright (c) 1997-2003, International Business Machines
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others.  All Rights Reserved.
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifndef _NORMCONF
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define _NORMCONF
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_NORMALIZATION
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/normlzr.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "intltest.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct _FileStream FileStream;
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass NormalizerConformanceTest : public IntlTest {
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Normalizer normalizer;
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru public:
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    NormalizerConformanceTest();
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    virtual ~NormalizerConformanceTest();
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void runIndexedTest(int32_t index, UBool exec, const char* &name, char* par=NULL);
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Test the conformance of Normalizer to
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void TestConformance();
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void TestConformance32();
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void TestConformance(FileStream *input, int32_t options);
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Specific tests for debugging.  These are generally failures taken from
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // the conformance file, but culled out to make debugging easier.
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void TestCase6(void);
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru private:
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FileStream *openNormalizationTestFile(const char *filename);
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Verify the conformance of the given line of the Unicode
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * normalization (UTR 15) test suite file.  For each line,
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * there are five columns, corresponding to field[0]..field[4].
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The following invariants must be true for all conformant implementations
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *  c2 == NFC(c1) == NFC(c2) == NFC(c3)
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *  c3 == NFD(c1) == NFD(c2) == NFD(c3)
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *  c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *  c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param field the 5 columns
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param line the source line from the test suite file
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return true if the test passes
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool checkConformance(const UnicodeString* field,
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           const char *line,
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           int32_t options,
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                           UErrorCode &status);
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void iterativeNorm(const UnicodeString& str,
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       UNormalizationMode mode, int32_t options,
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       UnicodeString& result,
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       int8_t dir);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param op name of normalization form, e.g., "KC"
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param s string being normalized
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param got value received
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param exp expected value
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param msg description of this test
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param return true if got == exp
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool assertEqual(const char *op,
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const UnicodeString& s,
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const UnicodeString& got,
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const UnicodeString& exp,
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      const char *msg,
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                      int32_t field);
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Split a string into pieces based on the given delimiter
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * character.  Then, parse the resultant fields from hex into
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * characters.  That is, "0040 0400;0C00;0899" -> new String[] {
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * "\u0040\u0400", "\u0C00", "\u0899" }.  The output is assumed to
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * be of the proper length already, and exactly output.length
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * fields are parsed.  If there are too few an exception is
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * thrown.  If there are too many the extras are ignored.
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     *
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @param buf scratch buffer
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * @return FALSE upon failure
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool hexsplit(const char *s, char delimiter,
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   UnicodeString output[], int32_t outputLength);
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void _testOneLine(const char *line);
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void compare(const UnicodeString& s1,const UnicodeString& s2);
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_NORMALIZATION */
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
108