1/*
2 * Copyright (c) 2013 Yandex LLC. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Yandex LLC nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "platform/text/UnicodeUtilities.h"
33
34#include "wtf/Vector.h"
35#include "wtf/text/WTFString.h"
36#include "wtf/unicode/CharacterNames.h"
37#include <gtest/gtest.h>
38#include <unicode/uchar.h>
39
40using namespace blink;
41
42namespace {
43
44static const UChar32 kMaxLatinCharCount = 256;
45
46static bool isTestFirstAndLastCharsInCategoryFailed = false;
47UBool U_CALLCONV testFirstAndLastCharsInCategory(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
48{
49    if (start >= kMaxLatinCharCount
50        && U_MASK(type) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK)
51        && (!isSeparator(start) || !isSeparator(limit - 1))) {
52        isTestFirstAndLastCharsInCategoryFailed = true;
53
54        // Break enumeration process
55        return 0;
56    }
57
58    return 1;
59}
60
61TEST(WebCoreUnicodeUnit, Separators)
62{
63    static const bool latinSeparatorTable[kMaxLatinCharCount] = {
64        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
65        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
67        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
68        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
69        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
70        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
71        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
72        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
75        1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
76        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
78        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
80    };
81
82    for (UChar32 character = 0; character < kMaxLatinCharCount; ++character) {
83        EXPECT_EQ(isSeparator(character), latinSeparatorTable[character]);
84    }
85
86    isTestFirstAndLastCharsInCategoryFailed = false;
87    u_enumCharTypes(&testFirstAndLastCharsInCategory, 0);
88    EXPECT_FALSE(isTestFirstAndLastCharsInCategoryFailed);
89}
90
91TEST(WebCoreUnicodeUnit, KanaLetters)
92{
93    // Non Kana symbols
94    for (UChar character = 0; character < 0x3041; ++character)
95        EXPECT_FALSE(isKanaLetter(character));
96
97    // Hiragana letters.
98    for (UChar character = 0x3041; character <= 0x3096; ++character)
99        EXPECT_TRUE(isKanaLetter(character));
100
101    // Katakana letters.
102    for (UChar character = 0x30A1; character <= 0x30FA; ++character)
103        EXPECT_TRUE(isKanaLetter(character));
104}
105
106TEST(WebCoreUnicodeUnit, ContainsKanaLetters)
107{
108    // Non Kana symbols
109    String nonKanaString;
110    for (UChar character = 0; character < 0x3041; ++character)
111        nonKanaString.append(character);
112    EXPECT_FALSE(containsKanaLetters(nonKanaString));
113
114    // Hiragana letters.
115    for (UChar character = 0x3041; character <= 0x3096; ++character) {
116        String str(nonKanaString);
117        str.append(character);
118        EXPECT_TRUE(containsKanaLetters(str));
119    }
120
121    // Katakana letters.
122    for (UChar character = 0x30A1; character <= 0x30FA; ++character) {
123        String str(nonKanaString);
124        str.append(character);
125        EXPECT_TRUE(containsKanaLetters(str));
126    }
127}
128
129TEST(WebCoreUnicodeUnit, FoldQuoteMarkOrSoftHyphenTest)
130{
131    const UChar charactersToFold[] = {
132        hebrewPunctuationGershayim, leftDoubleQuotationMark, rightDoubleQuotationMark,
133        hebrewPunctuationGeresh, leftSingleQuotationMark, rightSingleQuotationMark,
134        softHyphen
135    };
136
137    String stringToFold(charactersToFold, WTF_ARRAY_LENGTH(charactersToFold));
138    Vector<UChar> buffer;
139    stringToFold.appendTo(buffer);
140
141    foldQuoteMarksAndSoftHyphens(stringToFold);
142
143    const String foldedString("\"\"\"\'\'\'\0", WTF_ARRAY_LENGTH(charactersToFold));
144    EXPECT_EQ(stringToFold, foldedString);
145
146    foldQuoteMarksAndSoftHyphens(buffer.data(), buffer.size());
147    EXPECT_EQ(String(buffer), foldedString);
148}
149
150TEST(WebCoreUnicodeUnit, OnlyKanaLettersEqualityTest)
151{
152    const UChar nonKanaString1[] = { 'a', 'b', 'c', 'd' };
153    const UChar nonKanaString2[] = { 'e', 'f', 'g' };
154
155    // Check that non-Kana letters will be skipped.
156    EXPECT_TRUE(checkOnlyKanaLettersInStrings(
157        nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1),
158        nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
159
160    const UChar kanaString[] = { 'e', 'f', 'g', 0x3041 };
161    EXPECT_FALSE(checkOnlyKanaLettersInStrings(
162        kanaString, WTF_ARRAY_LENGTH(kanaString),
163        nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
164
165    // Compare with self.
166    EXPECT_TRUE(checkOnlyKanaLettersInStrings(
167        kanaString, WTF_ARRAY_LENGTH(kanaString),
168        kanaString, WTF_ARRAY_LENGTH(kanaString)));
169
170    UChar voicedKanaString1[] = { 0x3042, 0x3099 };
171    UChar voicedKanaString2[] = { 0x3042, 0x309A };
172
173    // Comparing strings with different sound marks should fail.
174    EXPECT_FALSE(checkOnlyKanaLettersInStrings(
175        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
176        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
177
178    // Now strings will be the same.
179    voicedKanaString2[1] = 0x3099;
180    EXPECT_TRUE(checkOnlyKanaLettersInStrings(
181        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
182        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
183
184    voicedKanaString2[0] = 0x3043;
185    EXPECT_FALSE(checkOnlyKanaLettersInStrings(
186        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
187        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
188}
189
190TEST(WebCoreUnicodeUnit, StringsWithKanaLettersTest)
191{
192    const UChar nonKanaString1[] = { 'a', 'b', 'c' };
193    const UChar nonKanaString2[] = { 'a', 'b', 'c' };
194
195    // Check that non-Kana letters will be compared.
196    EXPECT_TRUE(checkKanaStringsEqual(
197        nonKanaString1, WTF_ARRAY_LENGTH(nonKanaString1),
198        nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
199
200    const UChar kanaString[] = { 'a', 'b', 'c', 0x3041 };
201    EXPECT_FALSE(checkKanaStringsEqual(
202        kanaString, WTF_ARRAY_LENGTH(kanaString),
203        nonKanaString2, WTF_ARRAY_LENGTH(nonKanaString2)));
204
205    // Compare with self.
206    EXPECT_TRUE(checkKanaStringsEqual(
207        kanaString, WTF_ARRAY_LENGTH(kanaString),
208        kanaString, WTF_ARRAY_LENGTH(kanaString)));
209
210    const UChar kanaString2[] = { 'x', 'y', 'z', 0x3041 };
211    // Comparing strings with different non-Kana letters should fail.
212    EXPECT_FALSE(checkKanaStringsEqual(
213        kanaString, WTF_ARRAY_LENGTH(kanaString),
214        kanaString2, WTF_ARRAY_LENGTH(kanaString2)));
215
216    const UChar kanaString3[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o' };
217    // Check that non-Kana letters after Kana letters will be compared.
218    EXPECT_TRUE(checkKanaStringsEqual(
219        kanaString3, WTF_ARRAY_LENGTH(kanaString3),
220        kanaString3, WTF_ARRAY_LENGTH(kanaString3)));
221
222    const UChar kanaString4[] = { 'a', 'b', 'c', 0x3042, 0x3099, 'm', 'n', 'o', 'p' };
223    // And now comparing should fail.
224    EXPECT_FALSE(checkKanaStringsEqual(
225        kanaString3, WTF_ARRAY_LENGTH(kanaString3),
226        kanaString4, WTF_ARRAY_LENGTH(kanaString4)));
227
228    UChar voicedKanaString1[] = { 0x3042, 0x3099 };
229    UChar voicedKanaString2[] = { 0x3042, 0x309A };
230
231    // Comparing strings with different sound marks should fail.
232    EXPECT_FALSE(checkKanaStringsEqual(
233        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
234        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
235
236    // Now strings will be the same.
237    voicedKanaString2[1] = 0x3099;
238    EXPECT_TRUE(checkKanaStringsEqual(
239        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
240        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
241
242    voicedKanaString2[0] = 0x3043;
243    EXPECT_FALSE(checkKanaStringsEqual(
244        voicedKanaString1, WTF_ARRAY_LENGTH(voicedKanaString1),
245        voicedKanaString2, WTF_ARRAY_LENGTH(voicedKanaString2)));
246}
247
248} // namespace
249