1/*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "platform/text/BidiResolver.h"
33
34#include "platform/text/BidiTestHarness.h"
35#include "platform/text/TextRunIterator.h"
36#include "wtf/OwnPtr.h"
37#include <fstream>
38#include <gtest/gtest.h>
39
40namespace {
41
42using namespace WTF;
43using namespace blink;
44
45TEST(BidiResolver, Basic)
46{
47    bool hasStrongDirectionality;
48    String value("foo");
49    TextRun run(value);
50    BidiResolver<TextRunIterator, BidiCharacterRun> bidiResolver;
51    bidiResolver.setStatus(BidiStatus(run.direction(), run.directionalOverride()));
52    bidiResolver.setPositionIgnoringNestedIsolates(TextRunIterator(&run, 0));
53    TextDirection direction = bidiResolver.determineParagraphDirectionality(&hasStrongDirectionality);
54    EXPECT_TRUE(hasStrongDirectionality);
55    EXPECT_EQ(LTR, direction);
56}
57
58TextDirection determineParagraphDirectionality(const TextRun& textRun, bool* hasStrongDirectionality = 0)
59{
60    BidiResolver<TextRunIterator, BidiCharacterRun> resolver;
61    resolver.setStatus(BidiStatus(LTR, false));
62    resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0));
63    return resolver.determineParagraphDirectionality(hasStrongDirectionality);
64}
65
66struct TestData {
67    UChar text[3];
68    size_t length;
69    TextDirection expectedDirection;
70    bool expectedStrong;
71};
72
73void testDirectionality(const TestData& entry)
74{
75    bool hasStrongDirectionality;
76    String data(entry.text, entry.length);
77    TextRun run(data);
78    TextDirection direction = determineParagraphDirectionality(run, &hasStrongDirectionality);
79    EXPECT_EQ(entry.expectedStrong, hasStrongDirectionality);
80    EXPECT_EQ(entry.expectedDirection, direction);
81}
82
83TEST(BidiResolver, ParagraphDirectionSurrogates)
84{
85    const TestData testData[] = {
86        // Test strong RTL, non-BMP. (U+10858 Imperial Aramaic number one, strong RTL)
87        { { 0xD802, 0xDC58 }, 2, RTL, true },
88
89        // Test strong LTR, non-BMP. (U+1D15F Musical symbol quarter note, strong LTR)
90        { { 0xD834, 0xDD5F }, 2, LTR, true },
91
92        // Test broken surrogate: valid leading, invalid trail. (Lead of U+10858, space)
93        { { 0xD802, ' ' }, 2, LTR, false },
94
95        // Test broken surrogate: invalid leading. (Trail of U+10858, U+05D0 Hebrew Alef)
96        { { 0xDC58, 0x05D0 }, 2, RTL, true },
97
98        // Test broken surrogate: valid leading, invalid trail/valid lead, valid trail.
99        { { 0xD802, 0xD802, 0xDC58 }, 3, RTL, true },
100
101        // Test broken surrogate: valid leading, no trail (string too short). (Lead of U+10858)
102        { { 0xD802, 0xDC58 }, 1, LTR, false },
103
104        // Test broken surrogate: trail appearing before lead. (U+10858 units reversed)
105        { { 0xDC58, 0xD802 }, 2, LTR, false }
106    };
107    for (size_t i = 0; i < WTF_ARRAY_LENGTH(testData); ++i)
108        testDirectionality(testData[i]);
109}
110
111class BidiTestRunner {
112public:
113    BidiTestRunner()
114        : m_testsRun(0)
115        , m_testsSkipped(0)
116        , m_ignoredCharFailures(0)
117        , m_levelFailures(0)
118        , m_orderFailures(0)
119    {
120    }
121
122    void skipTestsWith(UChar codepoint)
123    {
124        m_skippedCodePoints.insert(codepoint);
125    }
126
127    void runTest(const std::basic_string<UChar>& input, const std::vector<int>& reorder,
128        const std::vector<int>& levels, bidi_test::ParagraphDirection,
129        const std::string& line, size_t lineNumber);
130
131    size_t m_testsRun;
132    size_t m_testsSkipped;
133    std::set<UChar> m_skippedCodePoints;
134    size_t m_ignoredCharFailures;
135    size_t m_levelFailures;
136    size_t m_orderFailures;
137};
138
139// Blink's UBA does not filter out control characters, etc. Maybe it should?
140// Instead it depends on later layers of Blink to simply ignore them.
141// This function helps us emulate that to be compatible with BidiTest.txt expectations.
142static bool isNonRenderedCodePoint(UChar c)
143{
144    // The tests also expect us to ignore soft-hyphen.
145    if (c == 0xAD)
146        return true;
147    // Control characters are not rendered:
148    return c >= 0x202A && c <= 0x202E;
149    // But it seems to expect LRI, etc. to be rendered!?
150}
151
152std::string diffString(const std::vector<int>& actual, const std::vector<int>& expected)
153{
154    std::ostringstream diff;
155    diff << "actual: ";
156    // This is the magical way to print a vector to a stream, clear, right?
157    std::copy(actual.begin(), actual.end(), std::ostream_iterator<int>(diff, " "));
158    diff << " expected: ";
159    std::copy(expected.begin(), expected.end(), std::ostream_iterator<int>(diff, " "));
160    return diff.str();
161}
162
163void BidiTestRunner::runTest(const std::basic_string<UChar>& input, const std::vector<int>& expectedOrder,
164    const std::vector<int>& expectedLevels, bidi_test::ParagraphDirection paragraphDirection,
165    const std::string& line, size_t lineNumber)
166{
167    if (!m_skippedCodePoints.empty()) {
168        for (size_t i = 0; i < input.size(); i++) {
169            if (m_skippedCodePoints.count(input[i])) {
170                m_testsSkipped++;
171                return;
172            }
173        }
174    }
175
176    m_testsRun++;
177
178    TextRun textRun(input.data(), input.size());
179    switch (paragraphDirection) {
180    case bidi_test::DirectionAutoLTR:
181        textRun.setDirection(determineParagraphDirectionality(textRun));
182        break;
183    case bidi_test::DirectionLTR:
184        textRun.setDirection(LTR);
185        break;
186    case bidi_test::DirectionRTL:
187        textRun.setDirection(RTL);
188        break;
189    }
190    BidiResolver<TextRunIterator, BidiCharacterRun> resolver;
191    resolver.setStatus(BidiStatus(textRun.direction(), textRun.directionalOverride()));
192    resolver.setPositionIgnoringNestedIsolates(TextRunIterator(&textRun, 0));
193
194    BidiRunList<BidiCharacterRun>& runs = resolver.runs();
195    resolver.createBidiRunsForLine(TextRunIterator(&textRun, textRun.length()));
196
197    std::ostringstream errorContext;
198    errorContext << ", line " << lineNumber << " \"" << line << "\"";
199    errorContext << " context: " << bidi_test::nameFromParagraphDirection(paragraphDirection);
200
201    std::vector<int> actualOrder;
202    std::vector<int> actualLevels;
203    actualLevels.assign(input.size(), -1);
204    BidiCharacterRun* run = runs.firstRun();
205    while (run) {
206        // Blink's UBA just makes runs, the actual ordering of the display of characters
207        // is handled later in our pipeline, so we fake it here:
208        bool reversed = run->reversed(false);
209        ASSERT(run->stop() >= run->start());
210        size_t length = run->stop() - run->start();
211        for (size_t i = 0; i < length; i++) {
212            int inputIndex = reversed ? run->stop() - i - 1 : run->start() + i;
213            if (!isNonRenderedCodePoint(input[inputIndex]))
214                actualOrder.push_back(inputIndex);
215            // BidiTest.txt gives expected level data in the order of the original input.
216            actualLevels[inputIndex] = run->level();
217        }
218        run = run->next();
219    }
220
221    if (expectedOrder.size() != actualOrder.size()) {
222        m_ignoredCharFailures++;
223        EXPECT_EQ(expectedOrder.size(), actualOrder.size()) << errorContext.str();
224    } else if (expectedOrder != actualOrder) {
225        m_orderFailures++;
226        printf("ORDER %s%s\n", diffString(actualOrder, expectedOrder).c_str(), errorContext.str().c_str());
227    }
228
229    if (expectedLevels.size() != actualLevels.size()) {
230        m_ignoredCharFailures++;
231        EXPECT_EQ(expectedLevels.size(), actualLevels.size()) << errorContext.str();
232    } else {
233        for (size_t i = 0; i < expectedLevels.size(); i++) {
234            // level == -1 means the level should be ignored.
235            if (expectedLevels[i] == actualLevels[i] || expectedLevels[i] == -1)
236                continue;
237
238            printf("LEVELS %s%s\n", diffString(actualLevels, expectedLevels).c_str(), errorContext.str().c_str());
239            m_levelFailures++;
240            break;
241        }
242    }
243    runs.deleteRuns();
244}
245
246
247TEST(BidiResolver, BidiTest_txt)
248{
249    BidiTestRunner runner;
250    // Blink's Unicode Bidi Algorithm (UBA) doesn't yet support the
251    // new isolate directives from Unicode 6.3:
252    // http://www.unicode.org/reports/tr9/#Explicit_Directional_Isolates
253    runner.skipTestsWith(0x2066); // LRI
254    runner.skipTestsWith(0x2067); // RLI
255    runner.skipTestsWith(0x2068); // FSI
256    runner.skipTestsWith(0x2069); // PDI
257
258    // This code wants to use PathService from base/path_service.h
259    // but we aren't allowed to depend on base/ directly from Blink yet.
260    // Alternatively we could use:
261    // blink::Platform::current()->unitTestSupport()->webKitRootDir()
262    // and a relative path, but that would require running inside
263    // webkit_unit_tests (to have a functioning Platform object).
264    // The file we want is:
265    // src/third_party/icu/source/test/testdata/BidiTest.txt
266    // but we don't have any good way to find it from this unittest.
267    // Just assume we're running this test manually for now. On the
268    // bots we just print a warning that we can't find the test file.
269    std::string bidiTestPath = "BidiTest.txt";
270    std::ifstream bidiTestFile(bidiTestPath.c_str());
271    if (!bidiTestFile.is_open()) {
272        printf("ERROR: Failed to open BidiTest.txt, cannot run tests.\n");
273        return;
274    }
275
276    bidi_test::Harness<BidiTestRunner> harness(runner);
277    harness.parse(bidiTestFile);
278    bidiTestFile.close();
279
280    if (runner.m_testsSkipped)
281        printf("WARNING: Skipped %zu tests.\n", runner.m_testsSkipped);
282    printf("Ran %zu tests: %zu level failures %zu order failures.\n",
283        runner.m_testsRun, runner.m_levelFailures, runner.m_orderFailures);
284
285    // The unittest harness only pays attention to GTest output, so we verify
286    // that the tests behaved as expected:
287    EXPECT_EQ(352098u, runner.m_testsRun);
288    EXPECT_EQ(418143u, runner.m_testsSkipped);
289    EXPECT_EQ(0u, runner.m_ignoredCharFailures);
290    EXPECT_EQ(44882u, runner.m_levelFailures);
291    EXPECT_EQ(19151u, runner.m_orderFailures);
292}
293
294}
295