1/*
2 * Copyright (C) 2012 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
14 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
15 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
17 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23 * THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27
28#include "wtf/MathExtras.h"
29#include "wtf/text/CString.h"
30#include "wtf/text/WTFString.h"
31#include <gtest/gtest.h>
32#include <limits>
33
34namespace {
35
36TEST(WTF, StringCreationFromLiteral)
37{
38    String stringFromLiteral("Explicit construction syntax");
39    ASSERT_EQ(strlen("Explicit construction syntax"), stringFromLiteral.length());
40    ASSERT_TRUE(stringFromLiteral == "Explicit construction syntax");
41    ASSERT_TRUE(stringFromLiteral.is8Bit());
42    ASSERT_TRUE(String("Explicit construction syntax") == stringFromLiteral);
43}
44
45TEST(WTF, StringASCII)
46{
47    CString output;
48
49    // Null String.
50    output = String().ascii();
51    ASSERT_STREQ("", output.data());
52
53    // Empty String.
54    output = emptyString().ascii();
55    ASSERT_STREQ("", output.data());
56
57    // Regular String.
58    output = String("foobar").ascii();
59    ASSERT_STREQ("foobar", output.data());
60}
61
62static void testNumberToStringECMAScript(double number, const char* reference)
63{
64    CString numberString = String::numberToStringECMAScript(number).latin1();
65    ASSERT_STREQ(reference, numberString.data());
66}
67
68TEST(WTF, StringNumberToStringECMAScriptBoundaries)
69{
70    typedef std::numeric_limits<double> Limits;
71
72    // Infinity.
73    testNumberToStringECMAScript(Limits::infinity(), "Infinity");
74    testNumberToStringECMAScript(-Limits::infinity(), "-Infinity");
75
76    // NaN.
77    testNumberToStringECMAScript(-Limits::quiet_NaN(), "NaN");
78
79    // Zeros.
80    testNumberToStringECMAScript(0, "0");
81    testNumberToStringECMAScript(-0, "0");
82
83    // Min-Max.
84    testNumberToStringECMAScript(Limits::min(), "2.2250738585072014e-308");
85    testNumberToStringECMAScript(Limits::max(), "1.7976931348623157e+308");
86}
87
88TEST(WTF, StringNumberToStringECMAScriptRegularNumbers)
89{
90    // Pi.
91    testNumberToStringECMAScript(piDouble, "3.141592653589793");
92    testNumberToStringECMAScript(piFloat, "3.1415927410125732");
93    testNumberToStringECMAScript(piOverTwoDouble, "1.5707963267948966");
94    testNumberToStringECMAScript(piOverTwoFloat, "1.5707963705062866");
95    testNumberToStringECMAScript(piOverFourDouble, "0.7853981633974483");
96    testNumberToStringECMAScript(piOverFourFloat, "0.7853981852531433");
97
98    // e.
99    const double e = 2.71828182845904523536028747135266249775724709369995;
100    testNumberToStringECMAScript(e, "2.718281828459045");
101
102    // c, speed of light in m/s.
103    const double c = 299792458;
104    testNumberToStringECMAScript(c, "299792458");
105
106    // Golen ratio.
107    const double phi = 1.6180339887498948482;
108    testNumberToStringECMAScript(phi, "1.618033988749895");
109}
110
111TEST(WTF, StringReplaceWithLiteral)
112{
113    // Cases for 8Bit source.
114    String testString = "1224";
115    ASSERT_TRUE(testString.is8Bit());
116    testString.replaceWithLiteral('2', "");
117    ASSERT_STREQ("14", testString.utf8().data());
118
119    testString = "1224";
120    ASSERT_TRUE(testString.is8Bit());
121    testString.replaceWithLiteral('2', "3");
122    ASSERT_STREQ("1334", testString.utf8().data());
123
124    testString = "1224";
125    ASSERT_TRUE(testString.is8Bit());
126    testString.replaceWithLiteral('2', "555");
127    ASSERT_STREQ("15555554", testString.utf8().data());
128
129    testString = "1224";
130    ASSERT_TRUE(testString.is8Bit());
131    testString.replaceWithLiteral('3', "NotFound");
132    ASSERT_STREQ("1224", testString.utf8().data());
133
134    // Cases for 16Bit source.
135    testString = String::fromUTF8("résumé");
136    ASSERT_FALSE(testString.is8Bit());
137    testString.replaceWithLiteral(UChar(0x00E9 /*U+00E9 is 'é'*/), "e");
138    ASSERT_STREQ("resume", testString.utf8().data());
139
140    testString = String::fromUTF8("résumé");
141    ASSERT_FALSE(testString.is8Bit());
142    testString.replaceWithLiteral(UChar(0x00E9 /*U+00E9 is 'é'*/), "");
143    ASSERT_STREQ("rsum", testString.utf8().data());
144
145    testString = String::fromUTF8("résumé");
146    ASSERT_FALSE(testString.is8Bit());
147    testString.replaceWithLiteral('3', "NotFound");
148    ASSERT_STREQ("résumé", testString.utf8().data());
149}
150
151TEST(WTF, StringComparisonOfSameStringVectors)
152{
153    Vector<String> stringVector;
154    stringVector.append("one");
155    stringVector.append("two");
156
157    Vector<String> sameStringVector;
158    sameStringVector.append("one");
159    sameStringVector.append("two");
160
161    ASSERT_EQ(stringVector, sameStringVector);
162}
163
164TEST(WTF, SimplifyWhiteSpace)
165{
166    String extraSpaces("  Hello  world  ");
167    ASSERT_EQ(String("Hello world"), extraSpaces.simplifyWhiteSpace());
168    ASSERT_EQ(String("  Hello  world  "), extraSpaces.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
169
170    String extraSpacesAndNewlines(" \nHello\n world\n ");
171    ASSERT_EQ(String("Hello world"), extraSpacesAndNewlines.simplifyWhiteSpace());
172    ASSERT_EQ(String("  Hello  world  "), extraSpacesAndNewlines.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
173
174    String extraSpacesAndTabs(" \nHello\t world\t ");
175    ASSERT_EQ(String("Hello world"), extraSpacesAndTabs.simplifyWhiteSpace());
176    ASSERT_EQ(String("  Hello  world  "), extraSpacesAndTabs.simplifyWhiteSpace(WTF::DoNotStripWhiteSpace));
177}
178
179struct CaseFoldingTestData {
180    const char* sourceDescription;
181    const char* source;
182    const char** localeList;
183    size_t localeListLength;
184    const char* expected;
185};
186
187// \xC4\xB0 = U+0130 (capital dotted I)
188// \xC4\xB1 = U+0131 (lowercase dotless I)
189const char* turkicInput = "Isi\xC4\xB0 \xC4\xB0s\xC4\xB1I";
190const char* greekInput = "\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xA3\xCE\xBF \xCE\xA3\xCE\x9F o\xCE\xA3 \xCE\x9F\xCE\xA3 \xCF\x83 \xE1\xBC\x95\xCE\xBE";
191const char* lithuanianInput = "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88";
192
193
194const char* turkicLocales[] = {
195    "tr", "tr-TR", "tr_TR", "tr@foo=bar", "tr-US", "TR", "tr-tr", "tR",
196    "az", "az-AZ", "az_AZ", "az@foo=bar", "az-US", "Az", "AZ-AZ", };
197const char* nonTurkicLocales[] = {
198    "en", "en-US", "en_US", "en@foo=bar", "EN", "En",
199    "ja", "el", "fil", "fi", "lt", };
200const char* greekLocales[] = {
201    "el", "el-GR", "el_GR", "el@foo=bar", "el-US", "EL", "el-gr", "eL",
202};
203const char* nonGreekLocales[] = {
204    "en", "en-US", "en_US", "en@foo=bar", "EN", "En",
205    "ja", "tr", "az", "fil", "fi", "lt", };
206const char* lithuanianLocales[] = {
207    "lt", "lt-LT", "lt_LT", "lt@foo=bar", "lt-US", "LT", "lt-lt", "lT",
208};
209// Should not have "tr" or "az" because "lt" and 'tr/az' rules conflict with each other.
210const char* nonLithuanianLocales[] = {
211    "en", "en-US", "en_US", "en@foo=bar", "EN", "En", "ja", "fil", "fi", "el", };
212
213TEST(WTF, StringToUpperLocale)
214{
215    CaseFoldingTestData testDataList[] = {
216        {
217            "Turkic input",
218            turkicInput,
219            turkicLocales,
220            sizeof(turkicLocales) / sizeof(const char*),
221            "IS\xC4\xB0\xC4\xB0 \xC4\xB0SII",
222        }, {
223            "Turkic input",
224            turkicInput,
225            nonTurkicLocales,
226            sizeof(nonTurkicLocales) / sizeof(const char*),
227            "ISI\xC4\xB0 \xC4\xB0SII",
228        }, {
229            "Greek input",
230            greekInput,
231            greekLocales,
232            sizeof(greekLocales) / sizeof(const char*),
233            "\xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x9F\xCE\xA3 \xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 \xCE\xA3 \xCE\x95\xCE\x9E",
234        }, {
235            "Greek input",
236            greekInput,
237            nonGreekLocales,
238            sizeof(nonGreekLocales) / sizeof(const char*),
239            "\xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\x9F\xCE\x94\xCE\x8C\xCE\xA3 \xCE\xA3\xCE\x9F \xCE\xA3\xCE\x9F \x4F\xCE\xA3 \xCE\x9F\xCE\xA3 \xCE\xA3 \xE1\xBC\x9D\xCE\x9E",
240        }, {
241            "Lithuanian input",
242            lithuanianInput,
243            lithuanianLocales,
244            sizeof(lithuanianLocales) / sizeof(const char*),
245            "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 XI\xCC\x88 XJ\xCC\x88 X\xC4\xAE\xCC\x88 XI\xCC\x80 XI\xCC\x81 XI\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88",
246        }, {
247            "Lithuanian input",
248            lithuanianInput,
249            nonLithuanianLocales,
250            sizeof(nonLithuanianLocales) / sizeof(const char*),
251            "I \xC3\x8F J J\xCC\x88 \xC4\xAE \xC4\xAE\xCC\x88 \xC3\x8C \xC3\x8D \xC4\xA8 XI\xCC\x87\xCC\x88 XJ\xCC\x87\xCC\x88 X\xC4\xAE\xCC\x87\xCC\x88 XI\xCC\x87\xCC\x80 XI\xCC\x87\xCC\x81 XI\xCC\x87\xCC\x83 XI X\xC3\x8F XJ XJ\xCC\x88 X\xC4\xAE X\xC4\xAE\xCC\x88",
252        },
253    };
254
255    for (size_t i = 0; i < sizeof(testDataList) / sizeof(testDataList[0]); ++i) {
256        const char* expected = testDataList[i].expected;
257        String source = String::fromUTF8(testDataList[i].source);
258        for (size_t j = 0; j < testDataList[i].localeListLength; ++j) {
259            const char* locale = testDataList[i].localeList[j];
260            EXPECT_STREQ(expected, source.upper(locale).utf8().data()) << testDataList[i].sourceDescription << "; locale=" << locale;
261        }
262    }
263}
264
265TEST(WTF, StringToLowerLocale)
266{
267    CaseFoldingTestData testDataList[] = {
268        {
269            "Turkic input",
270            turkicInput,
271            turkicLocales,
272            sizeof(turkicLocales) / sizeof(const char*),
273            "\xC4\xB1sii is\xC4\xB1\xC4\xB1",
274        }, {
275            "Turkic input",
276            turkicInput,
277            nonTurkicLocales,
278            sizeof(nonTurkicLocales) / sizeof(const char*),
279            // U+0130 is lowercased to U+0069 followed by U+0307
280            "isii\xCC\x87 i\xCC\x87s\xC4\xB1i",
281        }, {
282            "Greek input",
283            greekInput,
284            greekLocales,
285            sizeof(greekLocales) / sizeof(const char*),
286            "\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 \xCF\x83 \xE1\xBC\x95\xCE\xBE",
287        }, {
288            "Greek input",
289            greekInput,
290            nonGreekLocales,
291            sizeof(greekLocales) / sizeof(const char*),
292            "\xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCE\xBF\xCE\xB4\xCF\x8C\xCF\x82 \xCF\x83\xCE\xBF \xCF\x83\xCE\xBF \x6F\xCF\x82 \xCE\xBF\xCF\x82 \xCF\x83 \xE1\xBC\x95\xCE\xBE",
293        }, {
294            "Lithuanian input",
295            lithuanianInput,
296            lithuanianLocales,
297            sizeof(lithuanianLocales) / sizeof(const char*),
298            "i \xC3\xAF j j\xCC\x87\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x87\xCC\x88 i\xCC\x87\xCC\x80 i\xCC\x87\xCC\x81 i\xCC\x87\xCC\x83 xi\xCC\x87\xCC\x88 xj\xCC\x87\xCC\x88 x\xC4\xAF\xCC\x87\xCC\x88 xi\xCC\x87\xCC\x80 xi\xCC\x87\xCC\x81 xi\xCC\x87\xCC\x83 xi x\xC3\xAF xj xj\xCC\x87\xCC\x88 x\xC4\xAF x\xC4\xAF\xCC\x87\xCC\x88",
299        }, {
300            "Lithuanian input",
301            lithuanianInput,
302            nonLithuanianLocales,
303            sizeof(nonLithuanianLocales) / sizeof(const char*),
304            "\x69 \xC3\xAF \x6A \x6A\xCC\x88 \xC4\xAF \xC4\xAF\xCC\x88 \xC3\xAC \xC3\xAD \xC4\xA9 \x78\x69\xCC\x87\xCC\x88 \x78\x6A\xCC\x87\xCC\x88 \x78\xC4\xAF\xCC\x87\xCC\x88 \x78\x69\xCC\x87\xCC\x80 \x78\x69\xCC\x87\xCC\x81 \x78\x69\xCC\x87\xCC\x83 \x78\x69 \x78\xC3\xAF \x78\x6A \x78\x6A\xCC\x88 \x78\xC4\xAF \x78\xC4\xAF\xCC\x88",
305        },
306    };
307
308    for (size_t i = 0; i < sizeof(testDataList) / sizeof(testDataList[0]); ++i) {
309        const char* expected = testDataList[i].expected;
310        String source = String::fromUTF8(testDataList[i].source);
311        for (size_t j = 0; j < testDataList[i].localeListLength; ++j) {
312            const char* locale = testDataList[i].localeList[j];
313            EXPECT_STREQ(expected, source.lower(locale).utf8().data()) << testDataList[i].sourceDescription << "; locale=" << locale;
314        }
315    }
316}
317
318} // namespace
319