1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <math.h>
8#include <stdarg.h>
9
10#include <limits>
11#include <sstream>
12
13#include "base/basictypes.h"
14#include "base/strings/string16.h"
15#include "base/strings/utf_string_conversions.h"
16#include "testing/gmock/include/gmock/gmock.h"
17#include "testing/gtest/include/gtest/gtest.h"
18
19using ::testing::ElementsAre;
20
21namespace base {
22
23static const struct trim_case {
24  const wchar_t* input;
25  const TrimPositions positions;
26  const wchar_t* output;
27  const TrimPositions return_value;
28} trim_cases[] = {
29  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
30  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
31  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
32  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
33  {L"", TRIM_ALL, L"", TRIM_NONE},
34  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
35  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
36  {L"  ", TRIM_ALL, L"", TRIM_ALL},
37  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
38  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
39};
40
41static const struct trim_case_ascii {
42  const char* input;
43  const TrimPositions positions;
44  const char* output;
45  const TrimPositions return_value;
46} trim_cases_ascii[] = {
47  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
48  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
49  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
50  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
51  {"", TRIM_ALL, "", TRIM_NONE},
52  {"  ", TRIM_LEADING, "", TRIM_LEADING},
53  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
54  {"  ", TRIM_ALL, "", TRIM_ALL},
55  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
56};
57
58namespace {
59
60// Helper used to test TruncateUTF8ToByteSize.
61bool Truncated(const std::string& input, const size_t byte_size,
62               std::string* output) {
63    size_t prev = input.length();
64    TruncateUTF8ToByteSize(input, byte_size, output);
65    return prev != output->length();
66}
67
68}  // namespace
69
70TEST(StringUtilTest, TruncateUTF8ToByteSize) {
71  std::string output;
72
73  // Empty strings and invalid byte_size arguments
74  EXPECT_FALSE(Truncated(std::string(), 0, &output));
75  EXPECT_EQ(output, "");
76  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
77  EXPECT_EQ(output, "");
78  EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
79  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
80
81  // Testing the truncation of valid UTF8 correctly
82  EXPECT_TRUE(Truncated("abc", 2, &output));
83  EXPECT_EQ(output, "ab");
84  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
85  EXPECT_EQ(output.compare("\xc2\x81"), 0);
86  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
87  EXPECT_EQ(output.compare("\xc2\x81"), 0);
88  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
89  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
90
91  {
92    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
93    const std::string array_string(array, arraysize(array));
94    EXPECT_TRUE(Truncated(array_string, 4, &output));
95    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
96  }
97
98  {
99    const char array[] = "\x00\xc2\x81\xc2\x81";
100    const std::string array_string(array, arraysize(array));
101    EXPECT_TRUE(Truncated(array_string, 4, &output));
102    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
103  }
104
105  // Testing invalid UTF8
106  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
107  EXPECT_EQ(output.compare(""), 0);
108  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
109  EXPECT_EQ(output.compare(""), 0);
110  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
111  EXPECT_EQ(output.compare(""), 0);
112
113  // Testing invalid UTF8 mixed with valid UTF8
114  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
115  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
116  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
117  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
118  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
119              10, &output));
120  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
121  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
122              10, &output));
123  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
124  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
125  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
126
127  // Overlong sequences
128  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
129  EXPECT_EQ(output.compare(""), 0);
130  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
131  EXPECT_EQ(output.compare(""), 0);
132  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
133  EXPECT_EQ(output.compare(""), 0);
134  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
135  EXPECT_EQ(output.compare(""), 0);
136  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
137  EXPECT_EQ(output.compare(""), 0);
138  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
139  EXPECT_EQ(output.compare(""), 0);
140  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
141  EXPECT_EQ(output.compare(""), 0);
142  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
143  EXPECT_EQ(output.compare(""), 0);
144  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
145  EXPECT_EQ(output.compare(""), 0);
146  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
147  EXPECT_EQ(output.compare(""), 0);
148  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
149  EXPECT_EQ(output.compare(""), 0);
150
151  // Beyond U+10FFFF (the upper limit of Unicode codespace)
152  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
153  EXPECT_EQ(output.compare(""), 0);
154  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
155  EXPECT_EQ(output.compare(""), 0);
156  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
157  EXPECT_EQ(output.compare(""), 0);
158
159  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
160  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
161  EXPECT_EQ(output.compare(""), 0);
162  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
163  EXPECT_EQ(output.compare(""), 0);
164
165  {
166    const char array[] = "\x00\x00\xfe\xff";
167    const std::string array_string(array, arraysize(array));
168    EXPECT_TRUE(Truncated(array_string, 4, &output));
169    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
170  }
171
172  // Variants on the previous test
173  {
174    const char array[] = "\xff\xfe\x00\x00";
175    const std::string array_string(array, 4);
176    EXPECT_FALSE(Truncated(array_string, 4, &output));
177    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
178  }
179  {
180    const char array[] = "\xff\x00\x00\xfe";
181    const std::string array_string(array, arraysize(array));
182    EXPECT_TRUE(Truncated(array_string, 4, &output));
183    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
184  }
185
186  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
187  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
188  EXPECT_EQ(output.compare(""), 0);
189  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
190  EXPECT_EQ(output.compare(""), 0);
191  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
192  EXPECT_EQ(output.compare(""), 0);
193  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
194  EXPECT_EQ(output.compare(""), 0);
195  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
196  EXPECT_EQ(output.compare(""), 0);
197
198  // Strings in legacy encodings that are valid in UTF-8, but
199  // are invalid as UTF-8 in real data.
200  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
201  EXPECT_EQ(output.compare("caf"), 0);
202  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
203  EXPECT_EQ(output.compare(""), 0);
204  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
205  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
206  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
207              &output));
208  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
209
210  // Testing using the same string as input and output.
211  EXPECT_FALSE(Truncated(output, 4, &output));
212  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
213  EXPECT_TRUE(Truncated(output, 3, &output));
214  EXPECT_EQ(output.compare("\xa7\x41"), 0);
215
216  // "abc" with U+201[CD] in windows-125[0-8]
217  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
218  EXPECT_EQ(output.compare("\x93" "abc"), 0);
219
220  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
221  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
222  EXPECT_EQ(output.compare(""), 0);
223
224  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
225  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
226  EXPECT_EQ(output.compare(""), 0);
227}
228
229TEST(StringUtilTest, TrimWhitespace) {
230  string16 output;  // Allow contents to carry over to next testcase
231  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
232    const trim_case& value = trim_cases[i];
233    EXPECT_EQ(value.return_value,
234              TrimWhitespace(WideToUTF16(value.input), value.positions,
235                             &output));
236    EXPECT_EQ(WideToUTF16(value.output), output);
237  }
238
239  // Test that TrimWhitespace() can take the same string for input and output
240  output = ASCIIToUTF16("  This is a test \r\n");
241  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
242  EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
243
244  // Once more, but with a string of whitespace
245  output = ASCIIToUTF16("  \r\n");
246  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
247  EXPECT_EQ(string16(), output);
248
249  std::string output_ascii;
250  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
251    const trim_case_ascii& value = trim_cases_ascii[i];
252    EXPECT_EQ(value.return_value,
253              TrimWhitespace(value.input, value.positions, &output_ascii));
254    EXPECT_EQ(value.output, output_ascii);
255  }
256}
257
258static const struct collapse_case {
259  const wchar_t* input;
260  const bool trim;
261  const wchar_t* output;
262} collapse_cases[] = {
263  {L" Google Video ", false, L"Google Video"},
264  {L"Google Video", false, L"Google Video"},
265  {L"", false, L""},
266  {L"  ", false, L""},
267  {L"\t\rTest String\n", false, L"Test String"},
268  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
269  {L"    Test     \n  \t String    ", false, L"Test String"},
270  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
271  {L"   Test String", false, L"Test String"},
272  {L"Test String    ", false, L"Test String"},
273  {L"Test String", false, L"Test String"},
274  {L"", true, L""},
275  {L"\n", true, L""},
276  {L"  \r  ", true, L""},
277  {L"\nFoo", true, L"Foo"},
278  {L"\r  Foo  ", true, L"Foo"},
279  {L" Foo bar ", true, L"Foo bar"},
280  {L"  \tFoo  bar  \n", true, L"Foo bar"},
281  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
282};
283
284TEST(StringUtilTest, CollapseWhitespace) {
285  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
286    const collapse_case& value = collapse_cases[i];
287    EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
288  }
289}
290
291static const struct collapse_case_ascii {
292  const char* input;
293  const bool trim;
294  const char* output;
295} collapse_cases_ascii[] = {
296  {" Google Video ", false, "Google Video"},
297  {"Google Video", false, "Google Video"},
298  {"", false, ""},
299  {"  ", false, ""},
300  {"\t\rTest String\n", false, "Test String"},
301  {"    Test     \n  \t String    ", false, "Test String"},
302  {"   Test String", false, "Test String"},
303  {"Test String    ", false, "Test String"},
304  {"Test String", false, "Test String"},
305  {"", true, ""},
306  {"\n", true, ""},
307  {"  \r  ", true, ""},
308  {"\nFoo", true, "Foo"},
309  {"\r  Foo  ", true, "Foo"},
310  {" Foo bar ", true, "Foo bar"},
311  {"  \tFoo  bar  \n", true, "Foo bar"},
312  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
313};
314
315TEST(StringUtilTest, CollapseWhitespaceASCII) {
316  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
317    const collapse_case_ascii& value = collapse_cases_ascii[i];
318    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
319  }
320}
321
322TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
323  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
324  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
325  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
326  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
327  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
328  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
329}
330
331TEST(StringUtilTest, ContainsOnlyWhitespace) {
332  EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
333  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
334  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
335  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
336  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
337  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
338}
339
340TEST(StringUtilTest, IsStringUTF8) {
341  EXPECT_TRUE(IsStringUTF8("abc"));
342  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
343  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
344  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
345  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
346  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
347
348  // surrogate code points
349  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
350  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
351  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
352
353  // overlong sequences
354  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
355  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
356  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
357  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
358  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
359  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
360  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
361  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
362  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
363  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
364  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
365
366  // Beyond U+10FFFF (the upper limit of Unicode codespace)
367  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
368  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
369  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
370
371  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
372  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
373  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
374  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
375  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
376
377  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
378  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
379  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
380  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
381  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
382  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
383  // Strings in legacy encodings. We can certainly make up strings
384  // in a legacy encoding that are valid in UTF-8, but in real data,
385  // most of them are invalid as UTF-8.
386  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
387  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
388  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
389  // "abc" with U+201[CD] in windows-125[0-8]
390  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
391  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
392  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
393  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
394  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
395
396  // Check that we support Embedded Nulls. The first uses the canonical UTF-8
397  // representation, and the second uses a 2-byte sequence. The second version
398  // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
399  // given codepoint must be used.
400  static const char kEmbeddedNull[] = "embedded\0null";
401  EXPECT_TRUE(IsStringUTF8(
402      std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
403  EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
404}
405
406TEST(StringUtilTest, ConvertASCII) {
407  static const char* char_cases[] = {
408    "Google Video",
409    "Hello, world\n",
410    "0123ABCDwxyz \a\b\t\r\n!+,.~"
411  };
412
413  static const wchar_t* const wchar_cases[] = {
414    L"Google Video",
415    L"Hello, world\n",
416    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
417  };
418
419  for (size_t i = 0; i < arraysize(char_cases); ++i) {
420    EXPECT_TRUE(IsStringASCII(char_cases[i]));
421    std::wstring wide = ASCIIToWide(char_cases[i]);
422    EXPECT_EQ(wchar_cases[i], wide);
423
424    EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
425    std::string ascii = WideToASCII(wchar_cases[i]);
426    EXPECT_EQ(char_cases[i], ascii);
427  }
428
429  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
430  EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
431
432  // Convert empty strings.
433  std::wstring wempty;
434  std::string empty;
435  EXPECT_EQ(empty, WideToASCII(wempty));
436  EXPECT_EQ(wempty, ASCIIToWide(empty));
437
438  // Convert strings with an embedded NUL character.
439  const char chars_with_nul[] = "test\0string";
440  const int length_with_nul = arraysize(chars_with_nul) - 1;
441  std::string string_with_nul(chars_with_nul, length_with_nul);
442  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
443  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
444            wide_with_nul.length());
445  std::string narrow_with_nul = WideToASCII(wide_with_nul);
446  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
447            narrow_with_nul.length());
448  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
449}
450
451TEST(StringUtilTest, ToUpperASCII) {
452  EXPECT_EQ('C', ToUpperASCII('C'));
453  EXPECT_EQ('C', ToUpperASCII('c'));
454  EXPECT_EQ('2', ToUpperASCII('2'));
455
456  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
457  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
458  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
459
460  std::string in_place_a("Cc2");
461  StringToUpperASCII(&in_place_a);
462  EXPECT_EQ("CC2", in_place_a);
463
464  std::wstring in_place_w(L"Cc2");
465  StringToUpperASCII(&in_place_w);
466  EXPECT_EQ(L"CC2", in_place_w);
467
468  std::string original_a("Cc2");
469  std::string upper_a = StringToUpperASCII(original_a);
470  EXPECT_EQ("CC2", upper_a);
471
472  std::wstring original_w(L"Cc2");
473  std::wstring upper_w = StringToUpperASCII(original_w);
474  EXPECT_EQ(L"CC2", upper_w);
475}
476
477TEST(StringUtilTest, LowerCaseEqualsASCII) {
478  static const struct {
479    const wchar_t* src_w;
480    const char*    src_a;
481    const char*    dst;
482  } lowercase_cases[] = {
483    { L"FoO", "FoO", "foo" },
484    { L"foo", "foo", "foo" },
485    { L"FOO", "FOO", "foo" },
486  };
487
488  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
489    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
490                                     lowercase_cases[i].dst));
491    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
492                                     lowercase_cases[i].dst));
493  }
494}
495
496TEST(StringUtilTest, FormatBytesUnlocalized) {
497  static const struct {
498    int64 bytes;
499    const char* expected;
500  } cases[] = {
501    // Expected behavior: we show one post-decimal digit when we have
502    // under two pre-decimal digits, except in cases where it makes no
503    // sense (zero or bytes).
504    // Since we switch units once we cross the 1000 mark, this keeps
505    // the display of file sizes or bytes consistently around three
506    // digits.
507    {0, "0 B"},
508    {512, "512 B"},
509    {1024*1024, "1.0 MB"},
510    {1024*1024*1024, "1.0 GB"},
511    {10LL*1024*1024*1024, "10.0 GB"},
512    {99LL*1024*1024*1024, "99.0 GB"},
513    {105LL*1024*1024*1024, "105 GB"},
514    {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
515    {~(1LL<<63), "8192 PB"},
516
517    {99*1024 + 103, "99.1 kB"},
518    {1024*1024 + 103, "1.0 MB"},
519    {1024*1024 + 205 * 1024, "1.2 MB"},
520    {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
521    {10LL*1024*1024*1024, "10.0 GB"},
522    {100LL*1024*1024*1024, "100 GB"},
523  };
524
525  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
526    EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
527              FormatBytesUnlocalized(cases[i].bytes));
528  }
529}
530TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
531  static const struct {
532    const char* str;
533    string16::size_type start_offset;
534    const char* find_this;
535    const char* replace_with;
536    const char* expected;
537  } cases[] = {
538    {"aaa", 0, "a", "b", "bbb"},
539    {"abb", 0, "ab", "a", "ab"},
540    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
541    {"Not found", 0, "x", "0", "Not found"},
542    {"Not found again", 5, "x", "0", "Not found again"},
543    {" Making it much longer ", 0, " ", "Four score and seven years ago",
544     "Four score and seven years agoMakingFour score and seven years agoit"
545     "Four score and seven years agomuchFour score and seven years agolonger"
546     "Four score and seven years ago"},
547    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
548    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
549    {"abababab", 2, "ab", "c", "abccc"},
550  };
551
552  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
553    string16 str = ASCIIToUTF16(cases[i].str);
554    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
555                                 ASCIIToUTF16(cases[i].find_this),
556                                 ASCIIToUTF16(cases[i].replace_with));
557    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
558  }
559}
560
561TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
562  static const struct {
563    const char* str;
564    string16::size_type start_offset;
565    const char* find_this;
566    const char* replace_with;
567    const char* expected;
568  } cases[] = {
569    {"aaa", 0, "a", "b", "baa"},
570    {"abb", 0, "ab", "a", "ab"},
571    {"Removing some substrings inging", 0, "ing", "",
572      "Remov some substrings inging"},
573    {"Not found", 0, "x", "0", "Not found"},
574    {"Not found again", 5, "x", "0", "Not found again"},
575    {" Making it much longer ", 0, " ", "Four score and seven years ago",
576     "Four score and seven years agoMaking it much longer "},
577    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
578    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
579    {"abababab", 2, "ab", "c", "abcabab"},
580  };
581
582  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
583    string16 str = ASCIIToUTF16(cases[i].str);
584    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
585                                     ASCIIToUTF16(cases[i].find_this),
586                                     ASCIIToUTF16(cases[i].replace_with));
587    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
588  }
589}
590
591TEST(StringUtilTest, HexDigitToInt) {
592  EXPECT_EQ(0, HexDigitToInt('0'));
593  EXPECT_EQ(1, HexDigitToInt('1'));
594  EXPECT_EQ(2, HexDigitToInt('2'));
595  EXPECT_EQ(3, HexDigitToInt('3'));
596  EXPECT_EQ(4, HexDigitToInt('4'));
597  EXPECT_EQ(5, HexDigitToInt('5'));
598  EXPECT_EQ(6, HexDigitToInt('6'));
599  EXPECT_EQ(7, HexDigitToInt('7'));
600  EXPECT_EQ(8, HexDigitToInt('8'));
601  EXPECT_EQ(9, HexDigitToInt('9'));
602  EXPECT_EQ(10, HexDigitToInt('A'));
603  EXPECT_EQ(11, HexDigitToInt('B'));
604  EXPECT_EQ(12, HexDigitToInt('C'));
605  EXPECT_EQ(13, HexDigitToInt('D'));
606  EXPECT_EQ(14, HexDigitToInt('E'));
607  EXPECT_EQ(15, HexDigitToInt('F'));
608
609  // Verify the lower case as well.
610  EXPECT_EQ(10, HexDigitToInt('a'));
611  EXPECT_EQ(11, HexDigitToInt('b'));
612  EXPECT_EQ(12, HexDigitToInt('c'));
613  EXPECT_EQ(13, HexDigitToInt('d'));
614  EXPECT_EQ(14, HexDigitToInt('e'));
615  EXPECT_EQ(15, HexDigitToInt('f'));
616}
617
618// This checks where we can use the assignment operator for a va_list. We need
619// a way to do this since Visual C doesn't support va_copy, but assignment on
620// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
621// capability.
622static void VariableArgsFunc(const char* format, ...) {
623  va_list org;
624  va_start(org, format);
625
626  va_list dup;
627  GG_VA_COPY(dup, org);
628  int i1 = va_arg(org, int);
629  int j1 = va_arg(org, int);
630  char* s1 = va_arg(org, char*);
631  double d1 = va_arg(org, double);
632  va_end(org);
633
634  int i2 = va_arg(dup, int);
635  int j2 = va_arg(dup, int);
636  char* s2 = va_arg(dup, char*);
637  double d2 = va_arg(dup, double);
638
639  EXPECT_EQ(i1, i2);
640  EXPECT_EQ(j1, j2);
641  EXPECT_STREQ(s1, s2);
642  EXPECT_EQ(d1, d2);
643
644  va_end(dup);
645}
646
647TEST(StringUtilTest, VAList) {
648  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
649}
650
651// Test for Tokenize
652template <typename STR>
653void TokenizeTest() {
654  std::vector<STR> r;
655  size_t size;
656
657  size = Tokenize(STR("This is a string"), STR(" "), &r);
658  EXPECT_EQ(4U, size);
659  ASSERT_EQ(4U, r.size());
660  EXPECT_EQ(r[0], STR("This"));
661  EXPECT_EQ(r[1], STR("is"));
662  EXPECT_EQ(r[2], STR("a"));
663  EXPECT_EQ(r[3], STR("string"));
664  r.clear();
665
666  size = Tokenize(STR("one,two,three"), STR(","), &r);
667  EXPECT_EQ(3U, size);
668  ASSERT_EQ(3U, r.size());
669  EXPECT_EQ(r[0], STR("one"));
670  EXPECT_EQ(r[1], STR("two"));
671  EXPECT_EQ(r[2], STR("three"));
672  r.clear();
673
674  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
675  EXPECT_EQ(3U, size);
676  ASSERT_EQ(3U, r.size());
677  EXPECT_EQ(r[0], STR("one"));
678  EXPECT_EQ(r[1], STR("two"));
679  EXPECT_EQ(r[2], STR("three;four"));
680  r.clear();
681
682  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
683  EXPECT_EQ(4U, size);
684  ASSERT_EQ(4U, r.size());
685  EXPECT_EQ(r[0], STR("one"));
686  EXPECT_EQ(r[1], STR("two"));
687  EXPECT_EQ(r[2], STR("three"));
688  EXPECT_EQ(r[3], STR("four"));
689  r.clear();
690
691  size = Tokenize(STR("one, two, three"), STR(","), &r);
692  EXPECT_EQ(3U, size);
693  ASSERT_EQ(3U, r.size());
694  EXPECT_EQ(r[0], STR("one"));
695  EXPECT_EQ(r[1], STR(" two"));
696  EXPECT_EQ(r[2], STR(" three"));
697  r.clear();
698
699  size = Tokenize(STR("one, two, three, "), STR(","), &r);
700  EXPECT_EQ(4U, size);
701  ASSERT_EQ(4U, r.size());
702  EXPECT_EQ(r[0], STR("one"));
703  EXPECT_EQ(r[1], STR(" two"));
704  EXPECT_EQ(r[2], STR(" three"));
705  EXPECT_EQ(r[3], STR(" "));
706  r.clear();
707
708  size = Tokenize(STR("one, two, three,"), STR(","), &r);
709  EXPECT_EQ(3U, size);
710  ASSERT_EQ(3U, r.size());
711  EXPECT_EQ(r[0], STR("one"));
712  EXPECT_EQ(r[1], STR(" two"));
713  EXPECT_EQ(r[2], STR(" three"));
714  r.clear();
715
716  size = Tokenize(STR(), STR(","), &r);
717  EXPECT_EQ(0U, size);
718  ASSERT_EQ(0U, r.size());
719  r.clear();
720
721  size = Tokenize(STR(","), STR(","), &r);
722  EXPECT_EQ(0U, size);
723  ASSERT_EQ(0U, r.size());
724  r.clear();
725
726  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
727  EXPECT_EQ(0U, size);
728  ASSERT_EQ(0U, r.size());
729  r.clear();
730
731  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
732  EXPECT_EQ(1U, size);
733  ASSERT_EQ(1U, r.size());
734  EXPECT_EQ(r[0], STR("a"));
735  r.clear();
736
737  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
738  EXPECT_EQ(2U, size);
739  ASSERT_EQ(2U, r.size());
740  EXPECT_EQ(r[0], STR("\ta\t"));
741  EXPECT_EQ(r[1], STR("b\tcc"));
742  r.clear();
743}
744
745TEST(StringUtilTest, TokenizeStdString) {
746  TokenizeTest<std::string>();
747}
748
749TEST(StringUtilTest, TokenizeStringPiece) {
750  TokenizeTest<base::StringPiece>();
751}
752
753// Test for JoinString
754TEST(StringUtilTest, JoinString) {
755  std::vector<std::string> in;
756  EXPECT_EQ("", JoinString(in, ','));
757
758  in.push_back("a");
759  EXPECT_EQ("a", JoinString(in, ','));
760
761  in.push_back("b");
762  in.push_back("c");
763  EXPECT_EQ("a,b,c", JoinString(in, ','));
764
765  in.push_back(std::string());
766  EXPECT_EQ("a,b,c,", JoinString(in, ','));
767  in.push_back(" ");
768  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
769}
770
771// Test for JoinString overloaded with std::string separator
772TEST(StringUtilTest, JoinStringWithString) {
773  std::string separator(", ");
774  std::vector<std::string> parts;
775  EXPECT_EQ(std::string(), JoinString(parts, separator));
776
777  parts.push_back("a");
778  EXPECT_EQ("a", JoinString(parts, separator));
779
780  parts.push_back("b");
781  parts.push_back("c");
782  EXPECT_EQ("a, b, c", JoinString(parts, separator));
783
784  parts.push_back(std::string());
785  EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
786  parts.push_back(" ");
787  EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
788}
789
790// Test for JoinString overloaded with string16 separator
791TEST(StringUtilTest, JoinStringWithString16) {
792  string16 separator = ASCIIToUTF16(", ");
793  std::vector<string16> parts;
794  EXPECT_EQ(string16(), JoinString(parts, separator));
795
796  parts.push_back(ASCIIToUTF16("a"));
797  EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
798
799  parts.push_back(ASCIIToUTF16("b"));
800  parts.push_back(ASCIIToUTF16("c"));
801  EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
802
803  parts.push_back(ASCIIToUTF16(""));
804  EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
805  parts.push_back(ASCIIToUTF16(" "));
806  EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
807}
808
809TEST(StringUtilTest, StartsWith) {
810  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
811  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
812  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
813  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
814  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
815  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
816  EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
817  EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
818  EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
819  EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
820
821  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
822  EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
823  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
824  EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
825  EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
826  EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
827  EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));
828  EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));
829  EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));
830  EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));
831}
832
833TEST(StringUtilTest, EndsWith) {
834  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
835  EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
836  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
837  EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
838  EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
839  EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
840  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
841  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
842  EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));
843  EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));
844  EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));
845  EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));
846  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
847  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
848  EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
849  EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
850}
851
852TEST(StringUtilTest, GetStringFWithOffsets) {
853  std::vector<string16> subst;
854  subst.push_back(ASCIIToUTF16("1"));
855  subst.push_back(ASCIIToUTF16("2"));
856  std::vector<size_t> offsets;
857
858  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
859                            subst,
860                            &offsets);
861  EXPECT_EQ(2U, offsets.size());
862  EXPECT_EQ(7U, offsets[0]);
863  EXPECT_EQ(25U, offsets[1]);
864  offsets.clear();
865
866  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
867                            subst,
868                            &offsets);
869  EXPECT_EQ(2U, offsets.size());
870  EXPECT_EQ(25U, offsets[0]);
871  EXPECT_EQ(7U, offsets[1]);
872  offsets.clear();
873}
874
875TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
876  // Test whether replacestringplaceholders works as expected when there
877  // are fewer inputs than outputs.
878  std::vector<string16> subst;
879  subst.push_back(ASCIIToUTF16("9a"));
880  subst.push_back(ASCIIToUTF16("8b"));
881  subst.push_back(ASCIIToUTF16("7c"));
882
883  string16 formatted =
884      ReplaceStringPlaceholders(
885          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
886
887  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
888}
889
890TEST(StringUtilTest, ReplaceStringPlaceholders) {
891  std::vector<string16> subst;
892  subst.push_back(ASCIIToUTF16("9a"));
893  subst.push_back(ASCIIToUTF16("8b"));
894  subst.push_back(ASCIIToUTF16("7c"));
895  subst.push_back(ASCIIToUTF16("6d"));
896  subst.push_back(ASCIIToUTF16("5e"));
897  subst.push_back(ASCIIToUTF16("4f"));
898  subst.push_back(ASCIIToUTF16("3g"));
899  subst.push_back(ASCIIToUTF16("2h"));
900  subst.push_back(ASCIIToUTF16("1i"));
901
902  string16 formatted =
903      ReplaceStringPlaceholders(
904          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
905
906  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
907}
908
909TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
910  std::vector<string16> subst;
911  subst.push_back(ASCIIToUTF16("9a"));
912  subst.push_back(ASCIIToUTF16("8b"));
913  subst.push_back(ASCIIToUTF16("7c"));
914  subst.push_back(ASCIIToUTF16("6d"));
915  subst.push_back(ASCIIToUTF16("5e"));
916  subst.push_back(ASCIIToUTF16("4f"));
917  subst.push_back(ASCIIToUTF16("3g"));
918  subst.push_back(ASCIIToUTF16("2h"));
919  subst.push_back(ASCIIToUTF16("1i"));
920  subst.push_back(ASCIIToUTF16("0j"));
921  subst.push_back(ASCIIToUTF16("-1k"));
922  subst.push_back(ASCIIToUTF16("-2l"));
923  subst.push_back(ASCIIToUTF16("-3m"));
924  subst.push_back(ASCIIToUTF16("-4n"));
925
926  string16 formatted =
927      ReplaceStringPlaceholders(
928          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
929                       "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
930
931  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
932                                    "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
933}
934
935TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
936  std::vector<std::string> subst;
937  subst.push_back("9a");
938  subst.push_back("8b");
939  subst.push_back("7c");
940  subst.push_back("6d");
941  subst.push_back("5e");
942  subst.push_back("4f");
943  subst.push_back("3g");
944  subst.push_back("2h");
945  subst.push_back("1i");
946
947  std::string formatted =
948      ReplaceStringPlaceholders(
949          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
950
951  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
952}
953
954TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
955  std::vector<std::string> subst;
956  subst.push_back("a");
957  subst.push_back("b");
958  subst.push_back("c");
959  EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
960            "$1 $$2 $$$3");
961}
962
963TEST(StringUtilTest, MatchPatternTest) {
964  EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
965  EXPECT_TRUE(MatchPattern("www.google.com", "*"));
966  EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
967  EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
968  EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
969  EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
970  EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
971  EXPECT_FALSE(MatchPattern("", "*.*"));
972  EXPECT_TRUE(MatchPattern("", "*"));
973  EXPECT_TRUE(MatchPattern("", "?"));
974  EXPECT_TRUE(MatchPattern("", ""));
975  EXPECT_FALSE(MatchPattern("Hello", ""));
976  EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
977  // Stop after a certain recursion depth.
978  EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
979
980  // Test UTF8 matching.
981  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
982  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
983  EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
984  // Invalid sequences should be handled as a single invalid character.
985  EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
986  // If the pattern has invalid characters, it shouldn't match anything.
987  EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
988
989  // Test UTF16 character matching.
990  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
991                           UTF8ToUTF16("*.com")));
992  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
993                           UTF8ToUTF16("He??o\\*1*")));
994
995  // This test verifies that consecutive wild cards are collapsed into 1
996  // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
997  // recursion depth).
998  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
999                           UTF8ToUTF16("He********************************o")));
1000}
1001
1002TEST(StringUtilTest, LcpyTest) {
1003  // Test the normal case where we fit in our buffer.
1004  {
1005    char dst[10];
1006    wchar_t wdst[10];
1007    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1008    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1009    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1010    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1011  }
1012
1013  // Test dst_size == 0, nothing should be written to |dst| and we should
1014  // have the equivalent of strlen(src).
1015  {
1016    char dst[2] = {1, 2};
1017    wchar_t wdst[2] = {1, 2};
1018    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1019    EXPECT_EQ(1, dst[0]);
1020    EXPECT_EQ(2, dst[1]);
1021    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1022#if defined(WCHAR_T_IS_UNSIGNED)
1023    EXPECT_EQ(1U, wdst[0]);
1024    EXPECT_EQ(2U, wdst[1]);
1025#else
1026    EXPECT_EQ(1, wdst[0]);
1027    EXPECT_EQ(2, wdst[1]);
1028#endif
1029  }
1030
1031  // Test the case were we _just_ competely fit including the null.
1032  {
1033    char dst[8];
1034    wchar_t wdst[8];
1035    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1036    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1037    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1038    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1039  }
1040
1041  // Test the case were we we are one smaller, so we can't fit the null.
1042  {
1043    char dst[7];
1044    wchar_t wdst[7];
1045    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1046    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1047    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1048    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1049  }
1050
1051  // Test the case were we are just too small.
1052  {
1053    char dst[3];
1054    wchar_t wdst[3];
1055    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1056    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1057    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1058    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1059  }
1060}
1061
1062TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1063  static const struct {
1064    const wchar_t* input;
1065    bool portable;
1066  } cases[] = {
1067    { L"%ls", true },
1068    { L"%s", false },
1069    { L"%S", false },
1070    { L"%lS", false },
1071    { L"Hello, %s", false },
1072    { L"%lc", true },
1073    { L"%c", false },
1074    { L"%C", false },
1075    { L"%lC", false },
1076    { L"%ls %s", false },
1077    { L"%s %ls", false },
1078    { L"%s %ls %s", false },
1079    { L"%f", true },
1080    { L"%f %F", false },
1081    { L"%d %D", false },
1082    { L"%o %O", false },
1083    { L"%u %U", false },
1084    { L"%f %d %o %u", true },
1085    { L"%-8d (%02.1f%)", true },
1086    { L"% 10s", false },
1087    { L"% 10ls", true }
1088  };
1089  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1090    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1091}
1092
1093TEST(StringUtilTest, RemoveChars) {
1094  const char* kRemoveChars = "-/+*";
1095  std::string input = "A-+bc/d!*";
1096  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1097  EXPECT_EQ("Abcd!", input);
1098
1099  // No characters match kRemoveChars.
1100  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1101  EXPECT_EQ("Abcd!", input);
1102
1103  // Empty string.
1104  input.clear();
1105  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1106  EXPECT_EQ(std::string(), input);
1107}
1108
1109TEST(StringUtilTest, ReplaceChars) {
1110  struct TestData {
1111    const char* input;
1112    const char* replace_chars;
1113    const char* replace_with;
1114    const char* output;
1115    bool result;
1116  } cases[] = {
1117    { "", "", "", "", false },
1118    { "test", "", "", "test", false },
1119    { "test", "", "!", "test", false },
1120    { "test", "z", "!", "test", false },
1121    { "test", "e", "!", "t!st", true },
1122    { "test", "e", "!?", "t!?st", true },
1123    { "test", "ez", "!", "t!st", true },
1124    { "test", "zed", "!?", "t!?st", true },
1125    { "test", "t", "!?", "!?es!?", true },
1126    { "test", "et", "!>", "!>!>s!>", true },
1127    { "test", "zest", "!", "!!!!", true },
1128    { "test", "szt", "!", "!e!!", true },
1129    { "test", "t", "test", "testestest", true },
1130  };
1131
1132  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1133    std::string output;
1134    bool result = ReplaceChars(cases[i].input,
1135                               cases[i].replace_chars,
1136                               cases[i].replace_with,
1137                               &output);
1138    EXPECT_EQ(cases[i].result, result);
1139    EXPECT_EQ(cases[i].output, output);
1140  }
1141}
1142
1143TEST(StringUtilTest, ContainsOnlyChars) {
1144  // Providing an empty list of characters should return false but for the empty
1145  // string.
1146  EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1147  EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1148
1149  EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1150  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1151  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1152  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1153  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1154}
1155
1156class WriteIntoTest : public testing::Test {
1157 protected:
1158  static void WritesCorrectly(size_t num_chars) {
1159    std::string buffer;
1160    char kOriginal[] = "supercali";
1161    strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1162    // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1163    // string at the first \0.
1164    EXPECT_EQ(std::string(kOriginal,
1165                          std::min(num_chars, arraysize(kOriginal) - 1)),
1166              std::string(buffer.c_str()));
1167    EXPECT_EQ(num_chars, buffer.size());
1168  }
1169};
1170
1171TEST_F(WriteIntoTest, WriteInto) {
1172  // Validate that WriteInto reserves enough space and
1173  // sizes a string correctly.
1174  WritesCorrectly(1);
1175  WritesCorrectly(2);
1176  WritesCorrectly(5000);
1177
1178  // Validate that WriteInto doesn't modify other strings
1179  // when using a Copy-on-Write implementation.
1180  const char kLive[] = "live";
1181  const char kDead[] = "dead";
1182  const std::string live = kLive;
1183  std::string dead = live;
1184  strncpy(WriteInto(&dead, 5), kDead, 4);
1185  EXPECT_EQ(kDead, dead);
1186  EXPECT_EQ(4u, dead.size());
1187  EXPECT_EQ(kLive, live);
1188  EXPECT_EQ(4u, live.size());
1189}
1190
1191}  // namespace base
1192