1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <math.h>
6#include <stdarg.h>
7
8#include <limits>
9#include <sstream>
10
11#include "base/basictypes.h"
12#include "base/string_util.h"
13#include "base/utf_string_conversions.h"
14#include "testing/gmock/include/gmock/gmock.h"
15#include "testing/gtest/include/gtest/gtest.h"
16
17using ::testing::ElementsAre;
18
19namespace base {
20
21static const struct trim_case {
22  const wchar_t* input;
23  const TrimPositions positions;
24  const wchar_t* output;
25  const TrimPositions return_value;
26} trim_cases[] = {
27  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
28  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
29  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
30  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
31  {L"", TRIM_ALL, L"", TRIM_NONE},
32  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
33  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
34  {L"  ", TRIM_ALL, L"", TRIM_ALL},
35  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
36  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
37};
38
39static const struct trim_case_ascii {
40  const char* input;
41  const TrimPositions positions;
42  const char* output;
43  const TrimPositions return_value;
44} trim_cases_ascii[] = {
45  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
46  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
47  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
48  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
49  {"", TRIM_ALL, "", TRIM_NONE},
50  {"  ", TRIM_LEADING, "", TRIM_LEADING},
51  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
52  {"  ", TRIM_ALL, "", TRIM_ALL},
53  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
54};
55
56namespace {
57
58// Helper used to test TruncateUTF8ToByteSize.
59bool Truncated(const std::string& input, const size_t byte_size,
60               std::string* output) {
61    size_t prev = input.length();
62    TruncateUTF8ToByteSize(input, byte_size, output);
63    return prev != output->length();
64}
65
66}  // namespace
67
68TEST(StringUtilTest, TruncateUTF8ToByteSize) {
69  std::string output;
70
71  // Empty strings and invalid byte_size arguments
72  EXPECT_FALSE(Truncated("", 0, &output));
73  EXPECT_EQ(output, "");
74  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
75  EXPECT_EQ(output, "");
76  EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
77  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
78
79  // Testing the truncation of valid UTF8 correctly
80  EXPECT_TRUE(Truncated("abc", 2, &output));
81  EXPECT_EQ(output, "ab");
82  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
83  EXPECT_EQ(output.compare("\xc2\x81"), 0);
84  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
85  EXPECT_EQ(output.compare("\xc2\x81"), 0);
86  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
87  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
88
89  {
90    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
91    const std::string array_string(array, arraysize(array));
92    EXPECT_TRUE(Truncated(array_string, 4, &output));
93    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
94  }
95
96  {
97    const char array[] = "\x00\xc2\x81\xc2\x81";
98    const std::string array_string(array, arraysize(array));
99    EXPECT_TRUE(Truncated(array_string, 4, &output));
100    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
101  }
102
103  // Testing invalid UTF8
104  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
105  EXPECT_EQ(output.compare(""), 0);
106  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
107  EXPECT_EQ(output.compare(""), 0);
108  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
109  EXPECT_EQ(output.compare(""), 0);
110
111  // Testing invalid UTF8 mixed with valid UTF8
112  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
113  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
114  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
115  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
116  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
117              10, &output));
118  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
119  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
120              10, &output));
121  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
122  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
123  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
124
125  // Overlong sequences
126  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
127  EXPECT_EQ(output.compare(""), 0);
128  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
129  EXPECT_EQ(output.compare(""), 0);
130  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
131  EXPECT_EQ(output.compare(""), 0);
132  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
133  EXPECT_EQ(output.compare(""), 0);
134  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
135  EXPECT_EQ(output.compare(""), 0);
136  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
137  EXPECT_EQ(output.compare(""), 0);
138  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
139  EXPECT_EQ(output.compare(""), 0);
140  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
141  EXPECT_EQ(output.compare(""), 0);
142  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
143  EXPECT_EQ(output.compare(""), 0);
144  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
145  EXPECT_EQ(output.compare(""), 0);
146  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
147  EXPECT_EQ(output.compare(""), 0);
148
149  // Beyond U+10FFFF (the upper limit of Unicode codespace)
150  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
151  EXPECT_EQ(output.compare(""), 0);
152  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
153  EXPECT_EQ(output.compare(""), 0);
154  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
155  EXPECT_EQ(output.compare(""), 0);
156
157  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
158  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
159  EXPECT_EQ(output.compare(""), 0);
160  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
161  EXPECT_EQ(output.compare(""), 0);
162
163  {
164    const char array[] = "\x00\x00\xfe\xff";
165    const std::string array_string(array, arraysize(array));
166    EXPECT_TRUE(Truncated(array_string, 4, &output));
167    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
168  }
169
170  // Variants on the previous test
171  {
172    const char array[] = "\xff\xfe\x00\x00";
173    const std::string array_string(array, 4);
174    EXPECT_FALSE(Truncated(array_string, 4, &output));
175    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
176  }
177  {
178    const char array[] = "\xff\x00\x00\xfe";
179    const std::string array_string(array, arraysize(array));
180    EXPECT_TRUE(Truncated(array_string, 4, &output));
181    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
182  }
183
184  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
185  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
186  EXPECT_EQ(output.compare(""), 0);
187  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
188  EXPECT_EQ(output.compare(""), 0);
189  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
190  EXPECT_EQ(output.compare(""), 0);
191  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
192  EXPECT_EQ(output.compare(""), 0);
193  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
194  EXPECT_EQ(output.compare(""), 0);
195
196  // Strings in legacy encodings that are valid in UTF-8, but
197  // are invalid as UTF-8 in real data.
198  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
199  EXPECT_EQ(output.compare("caf"), 0);
200  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
201  EXPECT_EQ(output.compare(""), 0);
202  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
203  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
204  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
205              &output));
206  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
207
208  // Testing using the same string as input and output.
209  EXPECT_FALSE(Truncated(output, 4, &output));
210  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
211  EXPECT_TRUE(Truncated(output, 3, &output));
212  EXPECT_EQ(output.compare("\xa7\x41"), 0);
213
214  // "abc" with U+201[CD] in windows-125[0-8]
215  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
216  EXPECT_EQ(output.compare("\x93" "abc"), 0);
217
218  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
219  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
220  EXPECT_EQ(output.compare(""), 0);
221
222  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
223  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
224  EXPECT_EQ(output.compare(""), 0);
225}
226
227TEST(StringUtilTest, TrimWhitespace) {
228  std::wstring output;  // Allow contents to carry over to next testcase
229  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
230    const trim_case& value = trim_cases[i];
231    EXPECT_EQ(value.return_value,
232              TrimWhitespace(value.input, value.positions, &output));
233    EXPECT_EQ(value.output, output);
234  }
235
236  // Test that TrimWhitespace() can take the same string for input and output
237  output = L"  This is a test \r\n";
238  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
239  EXPECT_EQ(L"This is a test", output);
240
241  // Once more, but with a string of whitespace
242  output = L"  \r\n";
243  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
244  EXPECT_EQ(L"", output);
245
246  std::string output_ascii;
247  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
248    const trim_case_ascii& value = trim_cases_ascii[i];
249    EXPECT_EQ(value.return_value,
250              TrimWhitespace(value.input, value.positions, &output_ascii));
251    EXPECT_EQ(value.output, output_ascii);
252  }
253}
254
255static const struct collapse_case {
256  const wchar_t* input;
257  const bool trim;
258  const wchar_t* output;
259} collapse_cases[] = {
260  {L" Google Video ", false, L"Google Video"},
261  {L"Google Video", false, L"Google Video"},
262  {L"", false, L""},
263  {L"  ", false, L""},
264  {L"\t\rTest String\n", false, L"Test String"},
265  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
266  {L"    Test     \n  \t String    ", false, L"Test String"},
267  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
268  {L"   Test String", false, L"Test String"},
269  {L"Test String    ", false, L"Test String"},
270  {L"Test String", false, L"Test String"},
271  {L"", true, L""},
272  {L"\n", true, L""},
273  {L"  \r  ", true, L""},
274  {L"\nFoo", true, L"Foo"},
275  {L"\r  Foo  ", true, L"Foo"},
276  {L" Foo bar ", true, L"Foo bar"},
277  {L"  \tFoo  bar  \n", true, L"Foo bar"},
278  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
279};
280
281TEST(StringUtilTest, CollapseWhitespace) {
282  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
283    const collapse_case& value = collapse_cases[i];
284    EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
285  }
286}
287
288static const struct collapse_case_ascii {
289  const char* input;
290  const bool trim;
291  const char* output;
292} collapse_cases_ascii[] = {
293  {" Google Video ", false, "Google Video"},
294  {"Google Video", false, "Google Video"},
295  {"", false, ""},
296  {"  ", false, ""},
297  {"\t\rTest String\n", false, "Test String"},
298  {"    Test     \n  \t String    ", false, "Test String"},
299  {"   Test String", false, "Test String"},
300  {"Test String    ", false, "Test String"},
301  {"Test String", false, "Test String"},
302  {"", true, ""},
303  {"\n", true, ""},
304  {"  \r  ", true, ""},
305  {"\nFoo", true, "Foo"},
306  {"\r  Foo  ", true, "Foo"},
307  {" Foo bar ", true, "Foo bar"},
308  {"  \tFoo  bar  \n", true, "Foo bar"},
309  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
310};
311
312TEST(StringUtilTest, CollapseWhitespaceASCII) {
313  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
314    const collapse_case_ascii& value = collapse_cases_ascii[i];
315    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
316  }
317}
318
319TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
320  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
321  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
322  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
323  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
324  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
325  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
326}
327
328TEST(StringUtilTest, ContainsOnlyWhitespace) {
329  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
330  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
331  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
332  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
333  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
334  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
335}
336
337TEST(StringUtilTest, IsStringUTF8) {
338  EXPECT_TRUE(IsStringUTF8("abc"));
339  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
340  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
341  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
342  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
343  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
344
345  // surrogate code points
346  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
347  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
348  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
349
350  // overlong sequences
351  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
352  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
353  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
354  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
355  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
356  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
357  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
358  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
359  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
360  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
361  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
362
363  // Beyond U+10FFFF (the upper limit of Unicode codespace)
364  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
365  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
366  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
367
368  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
369  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
370  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
371  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
372  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
373
374  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
375  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
376  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
377  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
378  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
379  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
380  // Strings in legacy encodings. We can certainly make up strings
381  // in a legacy encoding that are valid in UTF-8, but in real data,
382  // most of them are invalid as UTF-8.
383  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
384  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
385  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
386  // "abc" with U+201[CD] in windows-125[0-8]
387  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
388  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
389  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
390  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
391  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
392
393  // Check that we support Embedded Nulls. The first uses the canonical UTF-8
394  // representation, and the second uses a 2-byte sequence. The second version
395  // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
396  // given codepoint must be used.
397  static const char kEmbeddedNull[] = "embedded\0null";
398  EXPECT_TRUE(IsStringUTF8(
399      std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
400  EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
401}
402
403TEST(StringUtilTest, ConvertASCII) {
404  static const char* char_cases[] = {
405    "Google Video",
406    "Hello, world\n",
407    "0123ABCDwxyz \a\b\t\r\n!+,.~"
408  };
409
410  static const wchar_t* const wchar_cases[] = {
411    L"Google Video",
412    L"Hello, world\n",
413    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
414  };
415
416  for (size_t i = 0; i < arraysize(char_cases); ++i) {
417    EXPECT_TRUE(IsStringASCII(char_cases[i]));
418    std::wstring wide = ASCIIToWide(char_cases[i]);
419    EXPECT_EQ(wchar_cases[i], wide);
420
421    EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
422    std::string ascii = WideToASCII(wchar_cases[i]);
423    EXPECT_EQ(char_cases[i], ascii);
424  }
425
426  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
427  EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
428
429  // Convert empty strings.
430  std::wstring wempty;
431  std::string empty;
432  EXPECT_EQ(empty, WideToASCII(wempty));
433  EXPECT_EQ(wempty, ASCIIToWide(empty));
434
435  // Convert strings with an embedded NUL character.
436  const char chars_with_nul[] = "test\0string";
437  const int length_with_nul = arraysize(chars_with_nul) - 1;
438  std::string string_with_nul(chars_with_nul, length_with_nul);
439  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
440  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
441            wide_with_nul.length());
442  std::string narrow_with_nul = WideToASCII(wide_with_nul);
443  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
444            narrow_with_nul.length());
445  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
446}
447
448TEST(StringUtilTest, ToUpperASCII) {
449  EXPECT_EQ('C', ToUpperASCII('C'));
450  EXPECT_EQ('C', ToUpperASCII('c'));
451  EXPECT_EQ('2', ToUpperASCII('2'));
452
453  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
454  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
455  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
456
457  std::string in_place_a("Cc2");
458  StringToUpperASCII(&in_place_a);
459  EXPECT_EQ("CC2", in_place_a);
460
461  std::wstring in_place_w(L"Cc2");
462  StringToUpperASCII(&in_place_w);
463  EXPECT_EQ(L"CC2", in_place_w);
464
465  std::string original_a("Cc2");
466  std::string upper_a = StringToUpperASCII(original_a);
467  EXPECT_EQ("CC2", upper_a);
468
469  std::wstring original_w(L"Cc2");
470  std::wstring upper_w = StringToUpperASCII(original_w);
471  EXPECT_EQ(L"CC2", upper_w);
472}
473
474static const struct {
475  const wchar_t* src_w;
476  const char*    src_a;
477  const char*    dst;
478} lowercase_cases[] = {
479  {L"FoO", "FoO", "foo"},
480  {L"foo", "foo", "foo"},
481  {L"FOO", "FOO", "foo"},
482};
483
484TEST(StringUtilTest, LowerCaseEqualsASCII) {
485  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
486    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
487                                     lowercase_cases[i].dst));
488    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
489                                     lowercase_cases[i].dst));
490  }
491}
492
493TEST(StringUtilTest, GetByteDisplayUnits) {
494  static const struct {
495    int64 bytes;
496    DataUnits expected;
497  } cases[] = {
498    {0, DATA_UNITS_BYTE},
499    {512, DATA_UNITS_BYTE},
500    {10*1024, DATA_UNITS_KIBIBYTE},
501    {10*1024*1024, DATA_UNITS_MEBIBYTE},
502    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
503    {~(1LL<<63), DATA_UNITS_GIBIBYTE},
504#ifdef NDEBUG
505    {-1, DATA_UNITS_BYTE},
506#endif
507  };
508
509  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
510    EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
511}
512
513TEST(StringUtilTest, FormatBytes) {
514  static const struct {
515    int64 bytes;
516    DataUnits units;
517    const char* expected;
518    const char* expected_with_units;
519  } cases[] = {
520    // Expected behavior: we show one post-decimal digit when we have
521    // under two pre-decimal digits, except in cases where it makes no
522    // sense (zero or bytes).
523    // Since we switch units once we cross the 1000 mark, this keeps
524    // the display of file sizes or bytes consistently around three
525    // digits.
526    {0, DATA_UNITS_BYTE, "0", "0 B"},
527    {512, DATA_UNITS_BYTE, "512", "512 B"},
528    {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
529    {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
530    {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
531    {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
532    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
533    {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
534    {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
535    {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
536     "105", "105 GB"},
537    {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
538
539    {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
540    {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
541    {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
542    {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
543     "1.9", "1.9 GB"},
544    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
545    {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
546#ifdef NDEBUG
547    {-1, DATA_UNITS_BYTE, "", ""},
548#endif
549  };
550
551  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
552    EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
553              FormatBytes(cases[i].bytes, cases[i].units, false));
554    EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
555              FormatBytes(cases[i].bytes, cases[i].units, true));
556  }
557}
558
559TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
560  static const struct {
561    const char* str;
562    string16::size_type start_offset;
563    const char* find_this;
564    const char* replace_with;
565    const char* expected;
566  } cases[] = {
567    {"aaa", 0, "a", "b", "bbb"},
568    {"abb", 0, "ab", "a", "ab"},
569    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
570    {"Not found", 0, "x", "0", "Not found"},
571    {"Not found again", 5, "x", "0", "Not found again"},
572    {" Making it much longer ", 0, " ", "Four score and seven years ago",
573     "Four score and seven years agoMakingFour score and seven years agoit"
574     "Four score and seven years agomuchFour score and seven years agolonger"
575     "Four score and seven years ago"},
576    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
577    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
578    {"abababab", 2, "ab", "c", "abccc"},
579  };
580
581  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
582    string16 str = ASCIIToUTF16(cases[i].str);
583    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
584                                 ASCIIToUTF16(cases[i].find_this),
585                                 ASCIIToUTF16(cases[i].replace_with));
586    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
587  }
588}
589
590TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
591  static const struct {
592    const char* str;
593    string16::size_type start_offset;
594    const char* find_this;
595    const char* replace_with;
596    const char* expected;
597  } cases[] = {
598    {"aaa", 0, "a", "b", "baa"},
599    {"abb", 0, "ab", "a", "ab"},
600    {"Removing some substrings inging", 0, "ing", "",
601      "Remov some substrings inging"},
602    {"Not found", 0, "x", "0", "Not found"},
603    {"Not found again", 5, "x", "0", "Not found again"},
604    {" Making it much longer ", 0, " ", "Four score and seven years ago",
605     "Four score and seven years agoMaking it much longer "},
606    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
607    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
608    {"abababab", 2, "ab", "c", "abcabab"},
609  };
610
611  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
612    string16 str = ASCIIToUTF16(cases[i].str);
613    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
614                                     ASCIIToUTF16(cases[i].find_this),
615                                     ASCIIToUTF16(cases[i].replace_with));
616    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
617  }
618}
619
620TEST(StringUtilTest, HexDigitToInt) {
621  EXPECT_EQ(0, HexDigitToInt('0'));
622  EXPECT_EQ(1, HexDigitToInt('1'));
623  EXPECT_EQ(2, HexDigitToInt('2'));
624  EXPECT_EQ(3, HexDigitToInt('3'));
625  EXPECT_EQ(4, HexDigitToInt('4'));
626  EXPECT_EQ(5, HexDigitToInt('5'));
627  EXPECT_EQ(6, HexDigitToInt('6'));
628  EXPECT_EQ(7, HexDigitToInt('7'));
629  EXPECT_EQ(8, HexDigitToInt('8'));
630  EXPECT_EQ(9, HexDigitToInt('9'));
631  EXPECT_EQ(10, HexDigitToInt('A'));
632  EXPECT_EQ(11, HexDigitToInt('B'));
633  EXPECT_EQ(12, HexDigitToInt('C'));
634  EXPECT_EQ(13, HexDigitToInt('D'));
635  EXPECT_EQ(14, HexDigitToInt('E'));
636  EXPECT_EQ(15, HexDigitToInt('F'));
637
638  // Verify the lower case as well.
639  EXPECT_EQ(10, HexDigitToInt('a'));
640  EXPECT_EQ(11, HexDigitToInt('b'));
641  EXPECT_EQ(12, HexDigitToInt('c'));
642  EXPECT_EQ(13, HexDigitToInt('d'));
643  EXPECT_EQ(14, HexDigitToInt('e'));
644  EXPECT_EQ(15, HexDigitToInt('f'));
645}
646
647// This checks where we can use the assignment operator for a va_list. We need
648// a way to do this since Visual C doesn't support va_copy, but assignment on
649// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
650// capability.
651static void VariableArgsFunc(const char* format, ...) {
652  va_list org;
653  va_start(org, format);
654
655  va_list dup;
656  GG_VA_COPY(dup, org);
657  int i1 = va_arg(org, int);
658  int j1 = va_arg(org, int);
659  char* s1 = va_arg(org, char*);
660  double d1 = va_arg(org, double);
661  va_end(org);
662
663  int i2 = va_arg(dup, int);
664  int j2 = va_arg(dup, int);
665  char* s2 = va_arg(dup, char*);
666  double d2 = va_arg(dup, double);
667
668  EXPECT_EQ(i1, i2);
669  EXPECT_EQ(j1, j2);
670  EXPECT_STREQ(s1, s2);
671  EXPECT_EQ(d1, d2);
672
673  va_end(dup);
674}
675
676TEST(StringUtilTest, VAList) {
677  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
678}
679
680// Test for Tokenize
681template <typename STR>
682void TokenizeTest() {
683  std::vector<STR> r;
684  size_t size;
685
686  size = Tokenize(STR("This is a string"), STR(" "), &r);
687  EXPECT_EQ(4U, size);
688  ASSERT_EQ(4U, r.size());
689  EXPECT_EQ(r[0], STR("This"));
690  EXPECT_EQ(r[1], STR("is"));
691  EXPECT_EQ(r[2], STR("a"));
692  EXPECT_EQ(r[3], STR("string"));
693  r.clear();
694
695  size = Tokenize(STR("one,two,three"), STR(","), &r);
696  EXPECT_EQ(3U, size);
697  ASSERT_EQ(3U, r.size());
698  EXPECT_EQ(r[0], STR("one"));
699  EXPECT_EQ(r[1], STR("two"));
700  EXPECT_EQ(r[2], STR("three"));
701  r.clear();
702
703  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
704  EXPECT_EQ(3U, size);
705  ASSERT_EQ(3U, r.size());
706  EXPECT_EQ(r[0], STR("one"));
707  EXPECT_EQ(r[1], STR("two"));
708  EXPECT_EQ(r[2], STR("three;four"));
709  r.clear();
710
711  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
712  EXPECT_EQ(4U, size);
713  ASSERT_EQ(4U, r.size());
714  EXPECT_EQ(r[0], STR("one"));
715  EXPECT_EQ(r[1], STR("two"));
716  EXPECT_EQ(r[2], STR("three"));
717  EXPECT_EQ(r[3], STR("four"));
718  r.clear();
719
720  size = Tokenize(STR("one, two, three"), STR(","), &r);
721  EXPECT_EQ(3U, size);
722  ASSERT_EQ(3U, r.size());
723  EXPECT_EQ(r[0], STR("one"));
724  EXPECT_EQ(r[1], STR(" two"));
725  EXPECT_EQ(r[2], STR(" three"));
726  r.clear();
727
728  size = Tokenize(STR("one, two, three, "), STR(","), &r);
729  EXPECT_EQ(4U, size);
730  ASSERT_EQ(4U, r.size());
731  EXPECT_EQ(r[0], STR("one"));
732  EXPECT_EQ(r[1], STR(" two"));
733  EXPECT_EQ(r[2], STR(" three"));
734  EXPECT_EQ(r[3], STR(" "));
735  r.clear();
736
737  size = Tokenize(STR("one, two, three,"), STR(","), &r);
738  EXPECT_EQ(3U, size);
739  ASSERT_EQ(3U, r.size());
740  EXPECT_EQ(r[0], STR("one"));
741  EXPECT_EQ(r[1], STR(" two"));
742  EXPECT_EQ(r[2], STR(" three"));
743  r.clear();
744
745  size = Tokenize(STR(""), STR(","), &r);
746  EXPECT_EQ(0U, size);
747  ASSERT_EQ(0U, r.size());
748  r.clear();
749
750  size = Tokenize(STR(","), STR(","), &r);
751  EXPECT_EQ(0U, size);
752  ASSERT_EQ(0U, r.size());
753  r.clear();
754
755  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
756  EXPECT_EQ(0U, size);
757  ASSERT_EQ(0U, r.size());
758  r.clear();
759
760  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
761  EXPECT_EQ(1U, size);
762  ASSERT_EQ(1U, r.size());
763  EXPECT_EQ(r[0], STR("a"));
764  r.clear();
765
766  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
767  EXPECT_EQ(2U, size);
768  ASSERT_EQ(2U, r.size());
769  EXPECT_EQ(r[0], STR("\ta\t"));
770  EXPECT_EQ(r[1], STR("b\tcc"));
771  r.clear();
772}
773
774TEST(StringUtilTest, TokenizeStdString) {
775  TokenizeTest<std::string>();
776}
777
778TEST(StringUtilTest, TokenizeStringPiece) {
779  TokenizeTest<base::StringPiece>();
780}
781
782// Test for JoinString
783TEST(StringUtilTest, JoinString) {
784  std::vector<std::string> in;
785  EXPECT_EQ("", JoinString(in, ','));
786
787  in.push_back("a");
788  EXPECT_EQ("a", JoinString(in, ','));
789
790  in.push_back("b");
791  in.push_back("c");
792  EXPECT_EQ("a,b,c", JoinString(in, ','));
793
794  in.push_back("");
795  EXPECT_EQ("a,b,c,", JoinString(in, ','));
796  in.push_back(" ");
797  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
798}
799
800TEST(StringUtilTest, StartsWith) {
801  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
802  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
803  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
804  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
805  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
806  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
807  EXPECT_FALSE(StartsWithASCII("", "javascript", false));
808  EXPECT_FALSE(StartsWithASCII("", "javascript", true));
809  EXPECT_TRUE(StartsWithASCII("java", "", false));
810  EXPECT_TRUE(StartsWithASCII("java", "", true));
811
812  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
813  EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
814  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
815  EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
816  EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
817  EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
818  EXPECT_FALSE(StartsWith(L"", L"javascript", false));
819  EXPECT_FALSE(StartsWith(L"", L"javascript", true));
820  EXPECT_TRUE(StartsWith(L"java", L"", false));
821  EXPECT_TRUE(StartsWith(L"java", L"", true));
822}
823
824TEST(StringUtilTest, EndsWith) {
825  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
826  EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
827  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
828  EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
829  EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
830  EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
831  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
832  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
833  EXPECT_FALSE(EndsWith(L"", L".plugin", false));
834  EXPECT_FALSE(EndsWith(L"", L".plugin", true));
835  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
836  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
837  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
838  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
839  EXPECT_TRUE(EndsWith(L"", L"", false));
840  EXPECT_TRUE(EndsWith(L"", L"", true));
841}
842
843TEST(StringUtilTest, GetStringFWithOffsets) {
844  std::vector<string16> subst;
845  subst.push_back(ASCIIToUTF16("1"));
846  subst.push_back(ASCIIToUTF16("2"));
847  std::vector<size_t> offsets;
848
849  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
850                            subst,
851                            &offsets);
852  EXPECT_EQ(2U, offsets.size());
853  EXPECT_EQ(7U, offsets[0]);
854  EXPECT_EQ(25U, offsets[1]);
855  offsets.clear();
856
857  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
858                            subst,
859                            &offsets);
860  EXPECT_EQ(2U, offsets.size());
861  EXPECT_EQ(25U, offsets[0]);
862  EXPECT_EQ(7U, offsets[1]);
863  offsets.clear();
864}
865
866TEST(StringUtilTest, ReplaceStringPlaceholders) {
867  std::vector<string16> subst;
868  subst.push_back(ASCIIToUTF16("9a"));
869  subst.push_back(ASCIIToUTF16("8b"));
870  subst.push_back(ASCIIToUTF16("7c"));
871  subst.push_back(ASCIIToUTF16("6d"));
872  subst.push_back(ASCIIToUTF16("5e"));
873  subst.push_back(ASCIIToUTF16("4f"));
874  subst.push_back(ASCIIToUTF16("3g"));
875  subst.push_back(ASCIIToUTF16("2h"));
876  subst.push_back(ASCIIToUTF16("1i"));
877
878  string16 formatted =
879      ReplaceStringPlaceholders(
880          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
881
882  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
883}
884
885TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
886  // Test whether replacestringplaceholders works as expected when there
887  // are fewer inputs than outputs.
888  std::vector<string16> subst;
889  subst.push_back(ASCIIToUTF16("9a"));
890  subst.push_back(ASCIIToUTF16("8b"));
891  subst.push_back(ASCIIToUTF16("7c"));
892
893  string16 formatted =
894      ReplaceStringPlaceholders(
895          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
896
897  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
898}
899
900TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
901  std::vector<std::string> subst;
902  subst.push_back("9a");
903  subst.push_back("8b");
904  subst.push_back("7c");
905  subst.push_back("6d");
906  subst.push_back("5e");
907  subst.push_back("4f");
908  subst.push_back("3g");
909  subst.push_back("2h");
910  subst.push_back("1i");
911
912  std::string formatted =
913      ReplaceStringPlaceholders(
914          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
915
916  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
917}
918
919TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
920  std::vector<std::string> subst;
921  subst.push_back("a");
922  subst.push_back("b");
923  subst.push_back("c");
924  EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
925            "$1 $$2 $$$3");
926}
927
928TEST(StringUtilTest, MatchPatternTest) {
929  EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
930  EXPECT_TRUE(MatchPattern("www.google.com", "*"));
931  EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
932  EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
933  EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
934  EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
935  EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
936  EXPECT_FALSE(MatchPattern("", "*.*"));
937  EXPECT_TRUE(MatchPattern("", "*"));
938  EXPECT_TRUE(MatchPattern("", "?"));
939  EXPECT_TRUE(MatchPattern("", ""));
940  EXPECT_FALSE(MatchPattern("Hello", ""));
941  EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
942  // Stop after a certain recursion depth.
943  EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
944
945  // Test UTF8 matching.
946  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
947  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
948  EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
949  // Invalid sequences should be handled as a single invalid character.
950  EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
951  // If the pattern has invalid characters, it shouldn't match anything.
952  EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
953
954  // Test UTF16 character matching.
955  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
956                           UTF8ToUTF16("*.com")));
957  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
958                           UTF8ToUTF16("He??o\\*1*")));
959
960  // This test verifies that consecutive wild cards are collapsed into 1
961  // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
962  // recursion depth).
963  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
964                           UTF8ToUTF16("He********************************o")));
965}
966
967TEST(StringUtilTest, LcpyTest) {
968  // Test the normal case where we fit in our buffer.
969  {
970    char dst[10];
971    wchar_t wdst[10];
972    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
973    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
974    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
975    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
976  }
977
978  // Test dst_size == 0, nothing should be written to |dst| and we should
979  // have the equivalent of strlen(src).
980  {
981    char dst[2] = {1, 2};
982    wchar_t wdst[2] = {1, 2};
983    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
984    EXPECT_EQ(1, dst[0]);
985    EXPECT_EQ(2, dst[1]);
986    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
987#if defined(WCHAR_T_IS_UNSIGNED)
988    EXPECT_EQ(1U, wdst[0]);
989    EXPECT_EQ(2U, wdst[1]);
990#else
991    EXPECT_EQ(1, wdst[0]);
992    EXPECT_EQ(2, wdst[1]);
993#endif
994  }
995
996  // Test the case were we _just_ competely fit including the null.
997  {
998    char dst[8];
999    wchar_t wdst[8];
1000    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1004  }
1005
1006  // Test the case were we we are one smaller, so we can't fit the null.
1007  {
1008    char dst[7];
1009    wchar_t wdst[7];
1010    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1011    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1012    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1013    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1014  }
1015
1016  // Test the case were we are just too small.
1017  {
1018    char dst[3];
1019    wchar_t wdst[3];
1020    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1021    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1022    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1023    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1024  }
1025}
1026
1027TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1028  struct TestData {
1029    const wchar_t* input;
1030    bool portable;
1031  } cases[] = {
1032    { L"%ls", true },
1033    { L"%s", false },
1034    { L"%S", false },
1035    { L"%lS", false },
1036    { L"Hello, %s", false },
1037    { L"%lc", true },
1038    { L"%c", false },
1039    { L"%C", false },
1040    { L"%lC", false },
1041    { L"%ls %s", false },
1042    { L"%s %ls", false },
1043    { L"%s %ls %s", false },
1044    { L"%f", true },
1045    { L"%f %F", false },
1046    { L"%d %D", false },
1047    { L"%o %O", false },
1048    { L"%u %U", false },
1049    { L"%f %d %o %u", true },
1050    { L"%-8d (%02.1f%)", true },
1051    { L"% 10s", false },
1052    { L"% 10ls", true }
1053  };
1054  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1055    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1056  }
1057}
1058
1059TEST(StringUtilTest, RemoveChars) {
1060  const char* kRemoveChars = "-/+*";
1061  std::string input = "A-+bc/d!*";
1062  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1063  EXPECT_EQ("Abcd!", input);
1064
1065  // No characters match kRemoveChars.
1066  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067  EXPECT_EQ("Abcd!", input);
1068
1069  // Empty string.
1070  input.clear();
1071  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1072  EXPECT_EQ(std::string(), input);
1073}
1074
1075TEST(StringUtilTest, ContainsOnlyChars) {
1076  // Providing an empty list of characters should return false but for the empty
1077  // string.
1078  EXPECT_TRUE(ContainsOnlyChars("", ""));
1079  EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1080
1081  EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1082  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1083  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1084  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1085  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1086}
1087
1088}  // namespace base
1089