1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <math.h>
8#include <stdarg.h>
9
10#include <algorithm>
11
12#include "base/basictypes.h"
13#include "base/strings/string16.h"
14#include "base/strings/utf_string_conversions.h"
15#include "testing/gmock/include/gmock/gmock.h"
16#include "testing/gtest/include/gtest/gtest.h"
17
18using ::testing::ElementsAre;
19
20namespace base {
21
22static const struct trim_case {
23  const wchar_t* input;
24  const TrimPositions positions;
25  const wchar_t* output;
26  const TrimPositions return_value;
27} trim_cases[] = {
28  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
29  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
30  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
31  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
32  {L"", TRIM_ALL, L"", TRIM_NONE},
33  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
34  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
35  {L"  ", TRIM_ALL, L"", TRIM_ALL},
36  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
37  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
38};
39
40static const struct trim_case_ascii {
41  const char* input;
42  const TrimPositions positions;
43  const char* output;
44  const TrimPositions return_value;
45} trim_cases_ascii[] = {
46  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
47  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
48  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
49  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
50  {"", TRIM_ALL, "", TRIM_NONE},
51  {"  ", TRIM_LEADING, "", TRIM_LEADING},
52  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
53  {"  ", TRIM_ALL, "", TRIM_ALL},
54  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
55};
56
57namespace {
58
59// Helper used to test TruncateUTF8ToByteSize.
60bool Truncated(const std::string& input,
61               const size_t byte_size,
62               std::string* output) {
63    size_t prev = input.length();
64    TruncateUTF8ToByteSize(input, byte_size, output);
65    return prev != output->length();
66}
67
68}  // namespace
69
70TEST(StringUtilTest, TruncateUTF8ToByteSize) {
71  std::string output;
72
73  // Empty strings and invalid byte_size arguments
74  EXPECT_FALSE(Truncated(std::string(), 0, &output));
75  EXPECT_EQ(output, "");
76  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
77  EXPECT_EQ(output, "");
78  EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
79  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
80
81  // Testing the truncation of valid UTF8 correctly
82  EXPECT_TRUE(Truncated("abc", 2, &output));
83  EXPECT_EQ(output, "ab");
84  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
85  EXPECT_EQ(output.compare("\xc2\x81"), 0);
86  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
87  EXPECT_EQ(output.compare("\xc2\x81"), 0);
88  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
89  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
90
91  {
92    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
93    const std::string array_string(array, arraysize(array));
94    EXPECT_TRUE(Truncated(array_string, 4, &output));
95    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
96  }
97
98  {
99    const char array[] = "\x00\xc2\x81\xc2\x81";
100    const std::string array_string(array, arraysize(array));
101    EXPECT_TRUE(Truncated(array_string, 4, &output));
102    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
103  }
104
105  // Testing invalid UTF8
106  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
107  EXPECT_EQ(output.compare(""), 0);
108  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
109  EXPECT_EQ(output.compare(""), 0);
110  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
111  EXPECT_EQ(output.compare(""), 0);
112
113  // Testing invalid UTF8 mixed with valid UTF8
114  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
115  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
116  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
117  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
118  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
119              10, &output));
120  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
122              10, &output));
123  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
125  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
126
127  // Overlong sequences
128  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
129  EXPECT_EQ(output.compare(""), 0);
130  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
131  EXPECT_EQ(output.compare(""), 0);
132  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
133  EXPECT_EQ(output.compare(""), 0);
134  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
135  EXPECT_EQ(output.compare(""), 0);
136  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
137  EXPECT_EQ(output.compare(""), 0);
138  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
139  EXPECT_EQ(output.compare(""), 0);
140  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
141  EXPECT_EQ(output.compare(""), 0);
142  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
143  EXPECT_EQ(output.compare(""), 0);
144  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
145  EXPECT_EQ(output.compare(""), 0);
146  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
147  EXPECT_EQ(output.compare(""), 0);
148  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
149  EXPECT_EQ(output.compare(""), 0);
150
151  // Beyond U+10FFFF (the upper limit of Unicode codespace)
152  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
153  EXPECT_EQ(output.compare(""), 0);
154  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
155  EXPECT_EQ(output.compare(""), 0);
156  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
157  EXPECT_EQ(output.compare(""), 0);
158
159  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
161  EXPECT_EQ(output.compare(""), 0);
162  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
163  EXPECT_EQ(output.compare(""), 0);
164
165  {
166    const char array[] = "\x00\x00\xfe\xff";
167    const std::string array_string(array, arraysize(array));
168    EXPECT_TRUE(Truncated(array_string, 4, &output));
169    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
170  }
171
172  // Variants on the previous test
173  {
174    const char array[] = "\xff\xfe\x00\x00";
175    const std::string array_string(array, 4);
176    EXPECT_FALSE(Truncated(array_string, 4, &output));
177    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
178  }
179  {
180    const char array[] = "\xff\x00\x00\xfe";
181    const std::string array_string(array, arraysize(array));
182    EXPECT_TRUE(Truncated(array_string, 4, &output));
183    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
184  }
185
186  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
188  EXPECT_EQ(output.compare(""), 0);
189  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
190  EXPECT_EQ(output.compare(""), 0);
191  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
192  EXPECT_EQ(output.compare(""), 0);
193  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
194  EXPECT_EQ(output.compare(""), 0);
195  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
196  EXPECT_EQ(output.compare(""), 0);
197
198  // Strings in legacy encodings that are valid in UTF-8, but
199  // are invalid as UTF-8 in real data.
200  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
201  EXPECT_EQ(output.compare("caf"), 0);
202  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
203  EXPECT_EQ(output.compare(""), 0);
204  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
205  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
206  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
207              &output));
208  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
209
210  // Testing using the same string as input and output.
211  EXPECT_FALSE(Truncated(output, 4, &output));
212  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
213  EXPECT_TRUE(Truncated(output, 3, &output));
214  EXPECT_EQ(output.compare("\xa7\x41"), 0);
215
216  // "abc" with U+201[CD] in windows-125[0-8]
217  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
218  EXPECT_EQ(output.compare("\x93" "abc"), 0);
219
220  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
222  EXPECT_EQ(output.compare(""), 0);
223
224  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
226  EXPECT_EQ(output.compare(""), 0);
227}
228
229TEST(StringUtilTest, TrimWhitespace) {
230  string16 output;  // Allow contents to carry over to next testcase
231  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
232    const trim_case& value = trim_cases[i];
233    EXPECT_EQ(value.return_value,
234              TrimWhitespace(WideToUTF16(value.input), value.positions,
235                             &output));
236    EXPECT_EQ(WideToUTF16(value.output), output);
237  }
238
239  // Test that TrimWhitespace() can take the same string for input and output
240  output = ASCIIToUTF16("  This is a test \r\n");
241  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
242  EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
243
244  // Once more, but with a string of whitespace
245  output = ASCIIToUTF16("  \r\n");
246  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
247  EXPECT_EQ(string16(), output);
248
249  std::string output_ascii;
250  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
251    const trim_case_ascii& value = trim_cases_ascii[i];
252    EXPECT_EQ(value.return_value,
253              TrimWhitespace(value.input, value.positions, &output_ascii));
254    EXPECT_EQ(value.output, output_ascii);
255  }
256}
257
258static const struct collapse_case {
259  const wchar_t* input;
260  const bool trim;
261  const wchar_t* output;
262} collapse_cases[] = {
263  {L" Google Video ", false, L"Google Video"},
264  {L"Google Video", false, L"Google Video"},
265  {L"", false, L""},
266  {L"  ", false, L""},
267  {L"\t\rTest String\n", false, L"Test String"},
268  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
269  {L"    Test     \n  \t String    ", false, L"Test String"},
270  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
271  {L"   Test String", false, L"Test String"},
272  {L"Test String    ", false, L"Test String"},
273  {L"Test String", false, L"Test String"},
274  {L"", true, L""},
275  {L"\n", true, L""},
276  {L"  \r  ", true, L""},
277  {L"\nFoo", true, L"Foo"},
278  {L"\r  Foo  ", true, L"Foo"},
279  {L" Foo bar ", true, L"Foo bar"},
280  {L"  \tFoo  bar  \n", true, L"Foo bar"},
281  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
282};
283
284TEST(StringUtilTest, CollapseWhitespace) {
285  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
286    const collapse_case& value = collapse_cases[i];
287    EXPECT_EQ(WideToUTF16(value.output),
288              CollapseWhitespace(WideToUTF16(value.input), value.trim));
289  }
290}
291
292static const struct collapse_case_ascii {
293  const char* input;
294  const bool trim;
295  const char* output;
296} collapse_cases_ascii[] = {
297  {" Google Video ", false, "Google Video"},
298  {"Google Video", false, "Google Video"},
299  {"", false, ""},
300  {"  ", false, ""},
301  {"\t\rTest String\n", false, "Test String"},
302  {"    Test     \n  \t String    ", false, "Test String"},
303  {"   Test String", false, "Test String"},
304  {"Test String    ", false, "Test String"},
305  {"Test String", false, "Test String"},
306  {"", true, ""},
307  {"\n", true, ""},
308  {"  \r  ", true, ""},
309  {"\nFoo", true, "Foo"},
310  {"\r  Foo  ", true, "Foo"},
311  {" Foo bar ", true, "Foo bar"},
312  {"  \tFoo  bar  \n", true, "Foo bar"},
313  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
314};
315
316TEST(StringUtilTest, CollapseWhitespaceASCII) {
317  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
318    const collapse_case_ascii& value = collapse_cases_ascii[i];
319    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
320  }
321}
322
323TEST(StringUtilTest, IsStringUTF8) {
324  EXPECT_TRUE(IsStringUTF8("abc"));
325  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
326  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
327  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
328  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
329  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
330
331  // surrogate code points
332  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
333  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
334  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
335
336  // overlong sequences
337  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
338  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
339  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
340  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
341  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
342  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
343  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
344  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
345  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
346  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
347  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
348
349  // Beyond U+10FFFF (the upper limit of Unicode codespace)
350  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
351  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
352  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
353
354  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
355  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
356  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
357  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
358  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
359
360  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
361  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
362  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
363  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
364  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
365  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
366  // Strings in legacy encodings. We can certainly make up strings
367  // in a legacy encoding that are valid in UTF-8, but in real data,
368  // most of them are invalid as UTF-8.
369  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
370  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
371  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
372  // "abc" with U+201[CD] in windows-125[0-8]
373  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
374  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
375  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
376  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
377  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
378
379  // Check that we support Embedded Nulls. The first uses the canonical UTF-8
380  // representation, and the second uses a 2-byte sequence. The second version
381  // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
382  // given codepoint must be used.
383  static const char kEmbeddedNull[] = "embedded\0null";
384  EXPECT_TRUE(IsStringUTF8(
385      std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
386  EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
387}
388
389TEST(StringUtilTest, ConvertASCII) {
390  static const char* char_cases[] = {
391    "Google Video",
392    "Hello, world\n",
393    "0123ABCDwxyz \a\b\t\r\n!+,.~"
394  };
395
396  static const wchar_t* const wchar_cases[] = {
397    L"Google Video",
398    L"Hello, world\n",
399    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
400  };
401
402  for (size_t i = 0; i < arraysize(char_cases); ++i) {
403    EXPECT_TRUE(IsStringASCII(char_cases[i]));
404    string16 utf16 = ASCIIToUTF16(char_cases[i]);
405    EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
406
407    std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
408    EXPECT_EQ(char_cases[i], ascii);
409  }
410
411  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
412
413  // Convert empty strings.
414  string16 empty16;
415  std::string empty;
416  EXPECT_EQ(empty, UTF16ToASCII(empty16));
417  EXPECT_EQ(empty16, ASCIIToUTF16(empty));
418
419  // Convert strings with an embedded NUL character.
420  const char chars_with_nul[] = "test\0string";
421  const int length_with_nul = arraysize(chars_with_nul) - 1;
422  std::string string_with_nul(chars_with_nul, length_with_nul);
423  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
424  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
425            wide_with_nul.length());
426  std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
427  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
428            narrow_with_nul.length());
429  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
430}
431
432TEST(StringUtilTest, ToUpperASCII) {
433  EXPECT_EQ('C', ToUpperASCII('C'));
434  EXPECT_EQ('C', ToUpperASCII('c'));
435  EXPECT_EQ('2', ToUpperASCII('2'));
436
437  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
438  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
439  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
440
441  std::string in_place_a("Cc2");
442  StringToUpperASCII(&in_place_a);
443  EXPECT_EQ("CC2", in_place_a);
444
445  std::wstring in_place_w(L"Cc2");
446  StringToUpperASCII(&in_place_w);
447  EXPECT_EQ(L"CC2", in_place_w);
448
449  std::string original_a("Cc2");
450  std::string upper_a = StringToUpperASCII(original_a);
451  EXPECT_EQ("CC2", upper_a);
452
453  std::wstring original_w(L"Cc2");
454  std::wstring upper_w = StringToUpperASCII(original_w);
455  EXPECT_EQ(L"CC2", upper_w);
456}
457
458TEST(StringUtilTest, LowerCaseEqualsASCII) {
459  static const struct {
460    const char*    src_a;
461    const char*    dst;
462  } lowercase_cases[] = {
463    { "FoO", "foo" },
464    { "foo", "foo" },
465    { "FOO", "foo" },
466  };
467
468  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
469    EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
470                                     lowercase_cases[i].dst));
471    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
472                                     lowercase_cases[i].dst));
473  }
474}
475
476TEST(StringUtilTest, FormatBytesUnlocalized) {
477  static const struct {
478    int64 bytes;
479    const char* expected;
480  } cases[] = {
481    // Expected behavior: we show one post-decimal digit when we have
482    // under two pre-decimal digits, except in cases where it makes no
483    // sense (zero or bytes).
484    // Since we switch units once we cross the 1000 mark, this keeps
485    // the display of file sizes or bytes consistently around three
486    // digits.
487    {0, "0 B"},
488    {512, "512 B"},
489    {1024*1024, "1.0 MB"},
490    {1024*1024*1024, "1.0 GB"},
491    {10LL*1024*1024*1024, "10.0 GB"},
492    {99LL*1024*1024*1024, "99.0 GB"},
493    {105LL*1024*1024*1024, "105 GB"},
494    {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
495    {~(1LL<<63), "8192 PB"},
496
497    {99*1024 + 103, "99.1 kB"},
498    {1024*1024 + 103, "1.0 MB"},
499    {1024*1024 + 205 * 1024, "1.2 MB"},
500    {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
501    {10LL*1024*1024*1024, "10.0 GB"},
502    {100LL*1024*1024*1024, "100 GB"},
503  };
504
505  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
506    EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
507              FormatBytesUnlocalized(cases[i].bytes));
508  }
509}
510TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
511  static const struct {
512    const char* str;
513    string16::size_type start_offset;
514    const char* find_this;
515    const char* replace_with;
516    const char* expected;
517  } cases[] = {
518    {"aaa", 0, "a", "b", "bbb"},
519    {"abb", 0, "ab", "a", "ab"},
520    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
521    {"Not found", 0, "x", "0", "Not found"},
522    {"Not found again", 5, "x", "0", "Not found again"},
523    {" Making it much longer ", 0, " ", "Four score and seven years ago",
524     "Four score and seven years agoMakingFour score and seven years agoit"
525     "Four score and seven years agomuchFour score and seven years agolonger"
526     "Four score and seven years ago"},
527    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
528    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
529    {"abababab", 2, "ab", "c", "abccc"},
530  };
531
532  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
533    string16 str = ASCIIToUTF16(cases[i].str);
534    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
535                                 ASCIIToUTF16(cases[i].find_this),
536                                 ASCIIToUTF16(cases[i].replace_with));
537    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
538  }
539}
540
541TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
542  static const struct {
543    const char* str;
544    string16::size_type start_offset;
545    const char* find_this;
546    const char* replace_with;
547    const char* expected;
548  } cases[] = {
549    {"aaa", 0, "a", "b", "baa"},
550    {"abb", 0, "ab", "a", "ab"},
551    {"Removing some substrings inging", 0, "ing", "",
552      "Remov some substrings inging"},
553    {"Not found", 0, "x", "0", "Not found"},
554    {"Not found again", 5, "x", "0", "Not found again"},
555    {" Making it much longer ", 0, " ", "Four score and seven years ago",
556     "Four score and seven years agoMaking it much longer "},
557    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
558    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
559    {"abababab", 2, "ab", "c", "abcabab"},
560  };
561
562  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
563    string16 str = ASCIIToUTF16(cases[i].str);
564    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
565                                     ASCIIToUTF16(cases[i].find_this),
566                                     ASCIIToUTF16(cases[i].replace_with));
567    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
568  }
569}
570
571TEST(StringUtilTest, HexDigitToInt) {
572  EXPECT_EQ(0, HexDigitToInt('0'));
573  EXPECT_EQ(1, HexDigitToInt('1'));
574  EXPECT_EQ(2, HexDigitToInt('2'));
575  EXPECT_EQ(3, HexDigitToInt('3'));
576  EXPECT_EQ(4, HexDigitToInt('4'));
577  EXPECT_EQ(5, HexDigitToInt('5'));
578  EXPECT_EQ(6, HexDigitToInt('6'));
579  EXPECT_EQ(7, HexDigitToInt('7'));
580  EXPECT_EQ(8, HexDigitToInt('8'));
581  EXPECT_EQ(9, HexDigitToInt('9'));
582  EXPECT_EQ(10, HexDigitToInt('A'));
583  EXPECT_EQ(11, HexDigitToInt('B'));
584  EXPECT_EQ(12, HexDigitToInt('C'));
585  EXPECT_EQ(13, HexDigitToInt('D'));
586  EXPECT_EQ(14, HexDigitToInt('E'));
587  EXPECT_EQ(15, HexDigitToInt('F'));
588
589  // Verify the lower case as well.
590  EXPECT_EQ(10, HexDigitToInt('a'));
591  EXPECT_EQ(11, HexDigitToInt('b'));
592  EXPECT_EQ(12, HexDigitToInt('c'));
593  EXPECT_EQ(13, HexDigitToInt('d'));
594  EXPECT_EQ(14, HexDigitToInt('e'));
595  EXPECT_EQ(15, HexDigitToInt('f'));
596}
597
598// This checks where we can use the assignment operator for a va_list. We need
599// a way to do this since Visual C doesn't support va_copy, but assignment on
600// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
601// capability.
602static void VariableArgsFunc(const char* format, ...) {
603  va_list org;
604  va_start(org, format);
605
606  va_list dup;
607  GG_VA_COPY(dup, org);
608  int i1 = va_arg(org, int);
609  int j1 = va_arg(org, int);
610  char* s1 = va_arg(org, char*);
611  double d1 = va_arg(org, double);
612  va_end(org);
613
614  int i2 = va_arg(dup, int);
615  int j2 = va_arg(dup, int);
616  char* s2 = va_arg(dup, char*);
617  double d2 = va_arg(dup, double);
618
619  EXPECT_EQ(i1, i2);
620  EXPECT_EQ(j1, j2);
621  EXPECT_STREQ(s1, s2);
622  EXPECT_EQ(d1, d2);
623
624  va_end(dup);
625}
626
627TEST(StringUtilTest, VAList) {
628  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
629}
630
631// Test for Tokenize
632template <typename STR>
633void TokenizeTest() {
634  std::vector<STR> r;
635  size_t size;
636
637  size = Tokenize(STR("This is a string"), STR(" "), &r);
638  EXPECT_EQ(4U, size);
639  ASSERT_EQ(4U, r.size());
640  EXPECT_EQ(r[0], STR("This"));
641  EXPECT_EQ(r[1], STR("is"));
642  EXPECT_EQ(r[2], STR("a"));
643  EXPECT_EQ(r[3], STR("string"));
644  r.clear();
645
646  size = Tokenize(STR("one,two,three"), STR(","), &r);
647  EXPECT_EQ(3U, size);
648  ASSERT_EQ(3U, r.size());
649  EXPECT_EQ(r[0], STR("one"));
650  EXPECT_EQ(r[1], STR("two"));
651  EXPECT_EQ(r[2], STR("three"));
652  r.clear();
653
654  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
655  EXPECT_EQ(3U, size);
656  ASSERT_EQ(3U, r.size());
657  EXPECT_EQ(r[0], STR("one"));
658  EXPECT_EQ(r[1], STR("two"));
659  EXPECT_EQ(r[2], STR("three;four"));
660  r.clear();
661
662  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
663  EXPECT_EQ(4U, size);
664  ASSERT_EQ(4U, r.size());
665  EXPECT_EQ(r[0], STR("one"));
666  EXPECT_EQ(r[1], STR("two"));
667  EXPECT_EQ(r[2], STR("three"));
668  EXPECT_EQ(r[3], STR("four"));
669  r.clear();
670
671  size = Tokenize(STR("one, two, three"), STR(","), &r);
672  EXPECT_EQ(3U, size);
673  ASSERT_EQ(3U, r.size());
674  EXPECT_EQ(r[0], STR("one"));
675  EXPECT_EQ(r[1], STR(" two"));
676  EXPECT_EQ(r[2], STR(" three"));
677  r.clear();
678
679  size = Tokenize(STR("one, two, three, "), STR(","), &r);
680  EXPECT_EQ(4U, size);
681  ASSERT_EQ(4U, r.size());
682  EXPECT_EQ(r[0], STR("one"));
683  EXPECT_EQ(r[1], STR(" two"));
684  EXPECT_EQ(r[2], STR(" three"));
685  EXPECT_EQ(r[3], STR(" "));
686  r.clear();
687
688  size = Tokenize(STR("one, two, three,"), STR(","), &r);
689  EXPECT_EQ(3U, size);
690  ASSERT_EQ(3U, r.size());
691  EXPECT_EQ(r[0], STR("one"));
692  EXPECT_EQ(r[1], STR(" two"));
693  EXPECT_EQ(r[2], STR(" three"));
694  r.clear();
695
696  size = Tokenize(STR(), STR(","), &r);
697  EXPECT_EQ(0U, size);
698  ASSERT_EQ(0U, r.size());
699  r.clear();
700
701  size = Tokenize(STR(","), STR(","), &r);
702  EXPECT_EQ(0U, size);
703  ASSERT_EQ(0U, r.size());
704  r.clear();
705
706  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
707  EXPECT_EQ(0U, size);
708  ASSERT_EQ(0U, r.size());
709  r.clear();
710
711  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
712  EXPECT_EQ(1U, size);
713  ASSERT_EQ(1U, r.size());
714  EXPECT_EQ(r[0], STR("a"));
715  r.clear();
716
717  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
718  EXPECT_EQ(2U, size);
719  ASSERT_EQ(2U, r.size());
720  EXPECT_EQ(r[0], STR("\ta\t"));
721  EXPECT_EQ(r[1], STR("b\tcc"));
722  r.clear();
723}
724
725TEST(StringUtilTest, TokenizeStdString) {
726  TokenizeTest<std::string>();
727}
728
729TEST(StringUtilTest, TokenizeStringPiece) {
730  TokenizeTest<base::StringPiece>();
731}
732
733// Test for JoinString
734TEST(StringUtilTest, JoinString) {
735  std::vector<std::string> in;
736  EXPECT_EQ("", JoinString(in, ','));
737
738  in.push_back("a");
739  EXPECT_EQ("a", JoinString(in, ','));
740
741  in.push_back("b");
742  in.push_back("c");
743  EXPECT_EQ("a,b,c", JoinString(in, ','));
744
745  in.push_back(std::string());
746  EXPECT_EQ("a,b,c,", JoinString(in, ','));
747  in.push_back(" ");
748  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
749}
750
751// Test for JoinString overloaded with std::string separator
752TEST(StringUtilTest, JoinStringWithString) {
753  std::string separator(", ");
754  std::vector<std::string> parts;
755  EXPECT_EQ(std::string(), JoinString(parts, separator));
756
757  parts.push_back("a");
758  EXPECT_EQ("a", JoinString(parts, separator));
759
760  parts.push_back("b");
761  parts.push_back("c");
762  EXPECT_EQ("a, b, c", JoinString(parts, separator));
763
764  parts.push_back(std::string());
765  EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
766  parts.push_back(" ");
767  EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
768}
769
770// Test for JoinString overloaded with string16 separator
771TEST(StringUtilTest, JoinStringWithString16) {
772  string16 separator = ASCIIToUTF16(", ");
773  std::vector<string16> parts;
774  EXPECT_EQ(string16(), JoinString(parts, separator));
775
776  parts.push_back(ASCIIToUTF16("a"));
777  EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
778
779  parts.push_back(ASCIIToUTF16("b"));
780  parts.push_back(ASCIIToUTF16("c"));
781  EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
782
783  parts.push_back(ASCIIToUTF16(""));
784  EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
785  parts.push_back(ASCIIToUTF16(" "));
786  EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
787}
788
789TEST(StringUtilTest, StartsWith) {
790  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
791  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
792  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
793  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
794  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
795  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
796  EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
797  EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
798  EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
799  EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
800
801  EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
802                         ASCIIToUTF16("javascript"), true));
803  EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
804                          ASCIIToUTF16("javascript"), true));
805  EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
806                         ASCIIToUTF16("javascript"), false));
807  EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
808                         ASCIIToUTF16("javascript"), false));
809  EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
810                          ASCIIToUTF16("javascript"), true));
811  EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
812                          ASCIIToUTF16("javascript"), false));
813  EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
814  EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
815  EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
816  EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
817}
818
819TEST(StringUtilTest, EndsWith) {
820  EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
821                       ASCIIToUTF16(".plugin"), true));
822  EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
823                        ASCIIToUTF16(".plugin"), true));
824  EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
825                       ASCIIToUTF16(".plugin"), false));
826  EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
827                       ASCIIToUTF16(".plugin"), false));
828  EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
829  EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
830  EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
831                        ASCIIToUTF16(".plugin"), true));
832  EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
833                        ASCIIToUTF16(".plugin"), false));
834  EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
835  EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
836  EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
837  EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
838  EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
839                       ASCIIToUTF16(".plugin"), false));
840  EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
841  EXPECT_TRUE(EndsWith(string16(), string16(), false));
842  EXPECT_TRUE(EndsWith(string16(), string16(), true));
843}
844
845TEST(StringUtilTest, GetStringFWithOffsets) {
846  std::vector<string16> subst;
847  subst.push_back(ASCIIToUTF16("1"));
848  subst.push_back(ASCIIToUTF16("2"));
849  std::vector<size_t> offsets;
850
851  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
852                            subst,
853                            &offsets);
854  EXPECT_EQ(2U, offsets.size());
855  EXPECT_EQ(7U, offsets[0]);
856  EXPECT_EQ(25U, offsets[1]);
857  offsets.clear();
858
859  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
860                            subst,
861                            &offsets);
862  EXPECT_EQ(2U, offsets.size());
863  EXPECT_EQ(25U, offsets[0]);
864  EXPECT_EQ(7U, offsets[1]);
865  offsets.clear();
866}
867
868TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
869  // Test whether replacestringplaceholders works as expected when there
870  // are fewer inputs than outputs.
871  std::vector<string16> subst;
872  subst.push_back(ASCIIToUTF16("9a"));
873  subst.push_back(ASCIIToUTF16("8b"));
874  subst.push_back(ASCIIToUTF16("7c"));
875
876  string16 formatted =
877      ReplaceStringPlaceholders(
878          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
879
880  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
881}
882
883TEST(StringUtilTest, ReplaceStringPlaceholders) {
884  std::vector<string16> subst;
885  subst.push_back(ASCIIToUTF16("9a"));
886  subst.push_back(ASCIIToUTF16("8b"));
887  subst.push_back(ASCIIToUTF16("7c"));
888  subst.push_back(ASCIIToUTF16("6d"));
889  subst.push_back(ASCIIToUTF16("5e"));
890  subst.push_back(ASCIIToUTF16("4f"));
891  subst.push_back(ASCIIToUTF16("3g"));
892  subst.push_back(ASCIIToUTF16("2h"));
893  subst.push_back(ASCIIToUTF16("1i"));
894
895  string16 formatted =
896      ReplaceStringPlaceholders(
897          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
898
899  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
900}
901
902TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
903  std::vector<string16> subst;
904  subst.push_back(ASCIIToUTF16("9a"));
905  subst.push_back(ASCIIToUTF16("8b"));
906  subst.push_back(ASCIIToUTF16("7c"));
907  subst.push_back(ASCIIToUTF16("6d"));
908  subst.push_back(ASCIIToUTF16("5e"));
909  subst.push_back(ASCIIToUTF16("4f"));
910  subst.push_back(ASCIIToUTF16("3g"));
911  subst.push_back(ASCIIToUTF16("2h"));
912  subst.push_back(ASCIIToUTF16("1i"));
913  subst.push_back(ASCIIToUTF16("0j"));
914  subst.push_back(ASCIIToUTF16("-1k"));
915  subst.push_back(ASCIIToUTF16("-2l"));
916  subst.push_back(ASCIIToUTF16("-3m"));
917  subst.push_back(ASCIIToUTF16("-4n"));
918
919  string16 formatted =
920      ReplaceStringPlaceholders(
921          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
922                       "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
923
924  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
925                                    "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
926}
927
928TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
929  std::vector<std::string> subst;
930  subst.push_back("9a");
931  subst.push_back("8b");
932  subst.push_back("7c");
933  subst.push_back("6d");
934  subst.push_back("5e");
935  subst.push_back("4f");
936  subst.push_back("3g");
937  subst.push_back("2h");
938  subst.push_back("1i");
939
940  std::string formatted =
941      ReplaceStringPlaceholders(
942          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
943
944  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
945}
946
947TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
948  std::vector<std::string> subst;
949  subst.push_back("a");
950  subst.push_back("b");
951  subst.push_back("c");
952  EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
953            "$1 $$2 $$$3");
954}
955
956TEST(StringUtilTest, MatchPatternTest) {
957  EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
958  EXPECT_TRUE(MatchPattern("www.google.com", "*"));
959  EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
960  EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
961  EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
962  EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
963  EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
964  EXPECT_FALSE(MatchPattern("", "*.*"));
965  EXPECT_TRUE(MatchPattern("", "*"));
966  EXPECT_TRUE(MatchPattern("", "?"));
967  EXPECT_TRUE(MatchPattern("", ""));
968  EXPECT_FALSE(MatchPattern("Hello", ""));
969  EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
970  // Stop after a certain recursion depth.
971  EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
972
973  // Test UTF8 matching.
974  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
975  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
976  EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
977  // Invalid sequences should be handled as a single invalid character.
978  EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
979  // If the pattern has invalid characters, it shouldn't match anything.
980  EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
981
982  // Test UTF16 character matching.
983  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
984                           UTF8ToUTF16("*.com")));
985  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
986                           UTF8ToUTF16("He??o\\*1*")));
987
988  // This test verifies that consecutive wild cards are collapsed into 1
989  // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
990  // recursion depth).
991  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
992                           UTF8ToUTF16("He********************************o")));
993}
994
995TEST(StringUtilTest, LcpyTest) {
996  // Test the normal case where we fit in our buffer.
997  {
998    char dst[10];
999    wchar_t wdst[10];
1000    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1004  }
1005
1006  // Test dst_size == 0, nothing should be written to |dst| and we should
1007  // have the equivalent of strlen(src).
1008  {
1009    char dst[2] = {1, 2};
1010    wchar_t wdst[2] = {1, 2};
1011    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1012    EXPECT_EQ(1, dst[0]);
1013    EXPECT_EQ(2, dst[1]);
1014    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1015    EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
1016    EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
1017  }
1018
1019  // Test the case were we _just_ competely fit including the null.
1020  {
1021    char dst[8];
1022    wchar_t wdst[8];
1023    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1024    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1025    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1026    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1027  }
1028
1029  // Test the case were we we are one smaller, so we can't fit the null.
1030  {
1031    char dst[7];
1032    wchar_t wdst[7];
1033    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1034    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1035    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1036    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1037  }
1038
1039  // Test the case were we are just too small.
1040  {
1041    char dst[3];
1042    wchar_t wdst[3];
1043    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1044    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1045    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1046    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1047  }
1048}
1049
1050TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1051  static const struct {
1052    const wchar_t* input;
1053    bool portable;
1054  } cases[] = {
1055    { L"%ls", true },
1056    { L"%s", false },
1057    { L"%S", false },
1058    { L"%lS", false },
1059    { L"Hello, %s", false },
1060    { L"%lc", true },
1061    { L"%c", false },
1062    { L"%C", false },
1063    { L"%lC", false },
1064    { L"%ls %s", false },
1065    { L"%s %ls", false },
1066    { L"%s %ls %s", false },
1067    { L"%f", true },
1068    { L"%f %F", false },
1069    { L"%d %D", false },
1070    { L"%o %O", false },
1071    { L"%u %U", false },
1072    { L"%f %d %o %u", true },
1073    { L"%-8d (%02.1f%)", true },
1074    { L"% 10s", false },
1075    { L"% 10ls", true }
1076  };
1077  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1078    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1079}
1080
1081TEST(StringUtilTest, RemoveChars) {
1082  const char* kRemoveChars = "-/+*";
1083  std::string input = "A-+bc/d!*";
1084  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1085  EXPECT_EQ("Abcd!", input);
1086
1087  // No characters match kRemoveChars.
1088  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1089  EXPECT_EQ("Abcd!", input);
1090
1091  // Empty string.
1092  input.clear();
1093  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1094  EXPECT_EQ(std::string(), input);
1095}
1096
1097TEST(StringUtilTest, ReplaceChars) {
1098  struct TestData {
1099    const char* input;
1100    const char* replace_chars;
1101    const char* replace_with;
1102    const char* output;
1103    bool result;
1104  } cases[] = {
1105    { "", "", "", "", false },
1106    { "test", "", "", "test", false },
1107    { "test", "", "!", "test", false },
1108    { "test", "z", "!", "test", false },
1109    { "test", "e", "!", "t!st", true },
1110    { "test", "e", "!?", "t!?st", true },
1111    { "test", "ez", "!", "t!st", true },
1112    { "test", "zed", "!?", "t!?st", true },
1113    { "test", "t", "!?", "!?es!?", true },
1114    { "test", "et", "!>", "!>!>s!>", true },
1115    { "test", "zest", "!", "!!!!", true },
1116    { "test", "szt", "!", "!e!!", true },
1117    { "test", "t", "test", "testestest", true },
1118  };
1119
1120  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1121    std::string output;
1122    bool result = ReplaceChars(cases[i].input,
1123                               cases[i].replace_chars,
1124                               cases[i].replace_with,
1125                               &output);
1126    EXPECT_EQ(cases[i].result, result);
1127    EXPECT_EQ(cases[i].output, output);
1128  }
1129}
1130
1131TEST(StringUtilTest, ContainsOnlyChars) {
1132  // Providing an empty list of characters should return false but for the empty
1133  // string.
1134  EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1135  EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1136
1137  EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1138  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1139  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1140  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1141  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1142
1143  EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
1144  EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
1145  EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
1146  EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
1147  EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
1148  EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
1149
1150  EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
1151  EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
1152  EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
1153  EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
1154  EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
1155  EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
1156                                  kWhitespaceUTF16));
1157}
1158
1159class WriteIntoTest : public testing::Test {
1160 protected:
1161  static void WritesCorrectly(size_t num_chars) {
1162    std::string buffer;
1163    char kOriginal[] = "supercali";
1164    strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1165    // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1166    // string at the first \0.
1167    EXPECT_EQ(std::string(kOriginal,
1168                          std::min(num_chars, arraysize(kOriginal) - 1)),
1169              std::string(buffer.c_str()));
1170    EXPECT_EQ(num_chars, buffer.size());
1171  }
1172};
1173
1174TEST_F(WriteIntoTest, WriteInto) {
1175  // Validate that WriteInto reserves enough space and
1176  // sizes a string correctly.
1177  WritesCorrectly(1);
1178  WritesCorrectly(2);
1179  WritesCorrectly(5000);
1180
1181  // Validate that WriteInto doesn't modify other strings
1182  // when using a Copy-on-Write implementation.
1183  const char kLive[] = "live";
1184  const char kDead[] = "dead";
1185  const std::string live = kLive;
1186  std::string dead = live;
1187  strncpy(WriteInto(&dead, 5), kDead, 4);
1188  EXPECT_EQ(kDead, dead);
1189  EXPECT_EQ(4u, dead.size());
1190  EXPECT_EQ(kLive, live);
1191  EXPECT_EQ(4u, live.size());
1192}
1193
1194}  // namespace base
1195