string_util_unittest.cc revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <math.h>
6#include <stdarg.h>
7
8#include <limits>
9#include <sstream>
10
11#include "base/basictypes.h"
12#include "base/string_util.h"
13#include "base/utf_string_conversions.h"
14#include "testing/gmock/include/gmock/gmock.h"
15#include "testing/gtest/include/gtest/gtest.h"
16
17using ::testing::ElementsAre;
18
19namespace base {
20
21namespace {
22
23// Given a null-terminated string of wchar_t with each wchar_t representing
24// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
25// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
26// should be represented as a surrogate pair (two UTF-16 units)
27// *even* where wchar_t is 32-bit (Linux and Mac).
28//
29// This is to help write tests for functions with string16 params until
30// the C++ 0x UTF-16 literal is well-supported by compilers.
31string16 BuildString16(const wchar_t* s) {
32#if defined(WCHAR_T_IS_UTF16)
33  return string16(s);
34#elif defined(WCHAR_T_IS_UTF32)
35  string16 u16;
36  while (*s != 0) {
37    DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);
38    u16.push_back(*s++);
39  }
40  return u16;
41#endif
42}
43
44}  // namespace
45
46static const struct trim_case {
47  const wchar_t* input;
48  const TrimPositions positions;
49  const wchar_t* output;
50  const TrimPositions return_value;
51} trim_cases[] = {
52  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
53  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
54  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
55  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
56  {L"", TRIM_ALL, L"", TRIM_NONE},
57  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
58  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
59  {L"  ", TRIM_ALL, L"", TRIM_ALL},
60  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
61  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
62};
63
64static const struct trim_case_ascii {
65  const char* input;
66  const TrimPositions positions;
67  const char* output;
68  const TrimPositions return_value;
69} trim_cases_ascii[] = {
70  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
71  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
72  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
73  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
74  {"", TRIM_ALL, "", TRIM_NONE},
75  {"  ", TRIM_LEADING, "", TRIM_LEADING},
76  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
77  {"  ", TRIM_ALL, "", TRIM_ALL},
78  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
79};
80
81namespace {
82
83// Helper used to test TruncateUTF8ToByteSize.
84bool Truncated(const std::string& input, const size_t byte_size,
85               std::string* output) {
86    size_t prev = input.length();
87    TruncateUTF8ToByteSize(input, byte_size, output);
88    return prev != output->length();
89}
90
91}  // namespace
92
93TEST(StringUtilTest, TruncateUTF8ToByteSize) {
94  std::string output;
95
96  // Empty strings and invalid byte_size arguments
97  EXPECT_FALSE(Truncated("", 0, &output));
98  EXPECT_EQ(output, "");
99  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
100  EXPECT_EQ(output, "");
101  EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
102  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
103
104  // Testing the truncation of valid UTF8 correctly
105  EXPECT_TRUE(Truncated("abc", 2, &output));
106  EXPECT_EQ(output, "ab");
107  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
108  EXPECT_EQ(output.compare("\xc2\x81"), 0);
109  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
110  EXPECT_EQ(output.compare("\xc2\x81"), 0);
111  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
112  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
113
114  {
115    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
116    const std::string array_string(array, arraysize(array));
117    EXPECT_TRUE(Truncated(array_string, 4, &output));
118    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
119  }
120
121  {
122    const char array[] = "\x00\xc2\x81\xc2\x81";
123    const std::string array_string(array, arraysize(array));
124    EXPECT_TRUE(Truncated(array_string, 4, &output));
125    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
126  }
127
128  // Testing invalid UTF8
129  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
130  EXPECT_EQ(output.compare(""), 0);
131  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
132  EXPECT_EQ(output.compare(""), 0);
133  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
134  EXPECT_EQ(output.compare(""), 0);
135
136  // Testing invalid UTF8 mixed with valid UTF8
137  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
138  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
139  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
140  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
141  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
142              10, &output));
143  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
144  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
145              10, &output));
146  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
147  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
148  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
149
150  // Overlong sequences
151  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
152  EXPECT_EQ(output.compare(""), 0);
153  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
154  EXPECT_EQ(output.compare(""), 0);
155  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
156  EXPECT_EQ(output.compare(""), 0);
157  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
158  EXPECT_EQ(output.compare(""), 0);
159  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
160  EXPECT_EQ(output.compare(""), 0);
161  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
162  EXPECT_EQ(output.compare(""), 0);
163  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
164  EXPECT_EQ(output.compare(""), 0);
165  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
166  EXPECT_EQ(output.compare(""), 0);
167  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
168  EXPECT_EQ(output.compare(""), 0);
169  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
170  EXPECT_EQ(output.compare(""), 0);
171  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
172  EXPECT_EQ(output.compare(""), 0);
173
174  // Beyond U+10FFFF (the upper limit of Unicode codespace)
175  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
176  EXPECT_EQ(output.compare(""), 0);
177  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
178  EXPECT_EQ(output.compare(""), 0);
179  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
180  EXPECT_EQ(output.compare(""), 0);
181
182  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
183  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
184  EXPECT_EQ(output.compare(""), 0);
185  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
186  EXPECT_EQ(output.compare(""), 0);
187
188  {
189    const char array[] = "\x00\x00\xfe\xff";
190    const std::string array_string(array, arraysize(array));
191    EXPECT_TRUE(Truncated(array_string, 4, &output));
192    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
193  }
194
195  // Variants on the previous test
196  {
197    const char array[] = "\xff\xfe\x00\x00";
198    const std::string array_string(array, 4);
199    EXPECT_FALSE(Truncated(array_string, 4, &output));
200    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
201  }
202  {
203    const char array[] = "\xff\x00\x00\xfe";
204    const std::string array_string(array, arraysize(array));
205    EXPECT_TRUE(Truncated(array_string, 4, &output));
206    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
207  }
208
209  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
210  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
211  EXPECT_EQ(output.compare(""), 0);
212  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
213  EXPECT_EQ(output.compare(""), 0);
214  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
215  EXPECT_EQ(output.compare(""), 0);
216  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
217  EXPECT_EQ(output.compare(""), 0);
218  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
219  EXPECT_EQ(output.compare(""), 0);
220
221  // Strings in legacy encodings that are valid in UTF-8, but
222  // are invalid as UTF-8 in real data.
223  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
224  EXPECT_EQ(output.compare("caf"), 0);
225  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
226  EXPECT_EQ(output.compare(""), 0);
227  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
228  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
229  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
230              &output));
231  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
232
233  // Testing using the same string as input and output.
234  EXPECT_FALSE(Truncated(output, 4, &output));
235  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
236  EXPECT_TRUE(Truncated(output, 3, &output));
237  EXPECT_EQ(output.compare("\xa7\x41"), 0);
238
239  // "abc" with U+201[CD] in windows-125[0-8]
240  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
241  EXPECT_EQ(output.compare("\x93" "abc"), 0);
242
243  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
244  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
245  EXPECT_EQ(output.compare(""), 0);
246
247  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
248  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
249  EXPECT_EQ(output.compare(""), 0);
250}
251
252TEST(StringUtilTest, TrimWhitespace) {
253  std::wstring output;  // Allow contents to carry over to next testcase
254  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
255    const trim_case& value = trim_cases[i];
256    EXPECT_EQ(value.return_value,
257              TrimWhitespace(value.input, value.positions, &output));
258    EXPECT_EQ(value.output, output);
259  }
260
261  // Test that TrimWhitespace() can take the same string for input and output
262  output = L"  This is a test \r\n";
263  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
264  EXPECT_EQ(L"This is a test", output);
265
266  // Once more, but with a string of whitespace
267  output = L"  \r\n";
268  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
269  EXPECT_EQ(L"", output);
270
271  std::string output_ascii;
272  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
273    const trim_case_ascii& value = trim_cases_ascii[i];
274    EXPECT_EQ(value.return_value,
275              TrimWhitespace(value.input, value.positions, &output_ascii));
276    EXPECT_EQ(value.output, output_ascii);
277  }
278}
279
280static const struct collapse_case {
281  const wchar_t* input;
282  const bool trim;
283  const wchar_t* output;
284} collapse_cases[] = {
285  {L" Google Video ", false, L"Google Video"},
286  {L"Google Video", false, L"Google Video"},
287  {L"", false, L""},
288  {L"  ", false, L""},
289  {L"\t\rTest String\n", false, L"Test String"},
290  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
291  {L"    Test     \n  \t String    ", false, L"Test String"},
292  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
293  {L"   Test String", false, L"Test String"},
294  {L"Test String    ", false, L"Test String"},
295  {L"Test String", false, L"Test String"},
296  {L"", true, L""},
297  {L"\n", true, L""},
298  {L"  \r  ", true, L""},
299  {L"\nFoo", true, L"Foo"},
300  {L"\r  Foo  ", true, L"Foo"},
301  {L" Foo bar ", true, L"Foo bar"},
302  {L"  \tFoo  bar  \n", true, L"Foo bar"},
303  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
304};
305
306TEST(StringUtilTest, CollapseWhitespace) {
307  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
308    const collapse_case& value = collapse_cases[i];
309    EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
310  }
311}
312
313static const struct collapse_case_ascii {
314  const char* input;
315  const bool trim;
316  const char* output;
317} collapse_cases_ascii[] = {
318  {" Google Video ", false, "Google Video"},
319  {"Google Video", false, "Google Video"},
320  {"", false, ""},
321  {"  ", false, ""},
322  {"\t\rTest String\n", false, "Test String"},
323  {"    Test     \n  \t String    ", false, "Test String"},
324  {"   Test String", false, "Test String"},
325  {"Test String    ", false, "Test String"},
326  {"Test String", false, "Test String"},
327  {"", true, ""},
328  {"\n", true, ""},
329  {"  \r  ", true, ""},
330  {"\nFoo", true, "Foo"},
331  {"\r  Foo  ", true, "Foo"},
332  {" Foo bar ", true, "Foo bar"},
333  {"  \tFoo  bar  \n", true, "Foo bar"},
334  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
335};
336
337TEST(StringUtilTest, CollapseWhitespaceASCII) {
338  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
339    const collapse_case_ascii& value = collapse_cases_ascii[i];
340    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
341  }
342}
343
344TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
345  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
346  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
347  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
348  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
349  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
350  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
351}
352
353TEST(StringUtilTest, ContainsOnlyWhitespace) {
354  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
355  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
356  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
357  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
358  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
359  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
360}
361
362TEST(StringUtilTest, IsStringUTF8) {
363  EXPECT_TRUE(IsStringUTF8("abc"));
364  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
365  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
366  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
367  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
368  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
369
370  // surrogate code points
371  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
372  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
373  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
374
375  // overlong sequences
376  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
377  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
378  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
379  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
380  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
381  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
382  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
383  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
384  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
385  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
386  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
387
388  // Beyond U+10FFFF (the upper limit of Unicode codespace)
389  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
390  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
391  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
392
393  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
394  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
395  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
396  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
397  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
398
399  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
400  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
401  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
402  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
403  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
404  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
405  // Strings in legacy encodings. We can certainly make up strings
406  // in a legacy encoding that are valid in UTF-8, but in real data,
407  // most of them are invalid as UTF-8.
408  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
409  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
410  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
411  // "abc" with U+201[CD] in windows-125[0-8]
412  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
413  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
414  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
415  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
416  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
417
418  // Check that we support Embedded Nulls. The first uses the canonical UTF-8
419  // representation, and the second uses a 2-byte sequence. The second version
420  // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
421  // given codepoint must be used.
422  static const char kEmbeddedNull[] = "embedded\0null";
423  EXPECT_TRUE(IsStringUTF8(
424      std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
425  EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
426}
427
428TEST(StringUtilTest, ConvertASCII) {
429  static const char* char_cases[] = {
430    "Google Video",
431    "Hello, world\n",
432    "0123ABCDwxyz \a\b\t\r\n!+,.~"
433  };
434
435  static const wchar_t* const wchar_cases[] = {
436    L"Google Video",
437    L"Hello, world\n",
438    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
439  };
440
441  for (size_t i = 0; i < arraysize(char_cases); ++i) {
442    EXPECT_TRUE(IsStringASCII(char_cases[i]));
443    std::wstring wide = ASCIIToWide(char_cases[i]);
444    EXPECT_EQ(wchar_cases[i], wide);
445
446    EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
447    std::string ascii = WideToASCII(wchar_cases[i]);
448    EXPECT_EQ(char_cases[i], ascii);
449  }
450
451  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
452  EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
453
454  // Convert empty strings.
455  std::wstring wempty;
456  std::string empty;
457  EXPECT_EQ(empty, WideToASCII(wempty));
458  EXPECT_EQ(wempty, ASCIIToWide(empty));
459
460  // Convert strings with an embedded NUL character.
461  const char chars_with_nul[] = "test\0string";
462  const int length_with_nul = arraysize(chars_with_nul) - 1;
463  std::string string_with_nul(chars_with_nul, length_with_nul);
464  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
465  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
466            wide_with_nul.length());
467  std::string narrow_with_nul = WideToASCII(wide_with_nul);
468  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
469            narrow_with_nul.length());
470  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
471}
472
473TEST(StringUtilTest, ToUpperASCII) {
474  EXPECT_EQ('C', ToUpperASCII('C'));
475  EXPECT_EQ('C', ToUpperASCII('c'));
476  EXPECT_EQ('2', ToUpperASCII('2'));
477
478  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
479  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
480  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
481
482  std::string in_place_a("Cc2");
483  StringToUpperASCII(&in_place_a);
484  EXPECT_EQ("CC2", in_place_a);
485
486  std::wstring in_place_w(L"Cc2");
487  StringToUpperASCII(&in_place_w);
488  EXPECT_EQ(L"CC2", in_place_w);
489
490  std::string original_a("Cc2");
491  std::string upper_a = StringToUpperASCII(original_a);
492  EXPECT_EQ("CC2", upper_a);
493
494  std::wstring original_w(L"Cc2");
495  std::wstring upper_w = StringToUpperASCII(original_w);
496  EXPECT_EQ(L"CC2", upper_w);
497}
498
499static const struct {
500  const wchar_t* src_w;
501  const char*    src_a;
502  const char*    dst;
503} lowercase_cases[] = {
504  {L"FoO", "FoO", "foo"},
505  {L"foo", "foo", "foo"},
506  {L"FOO", "FOO", "foo"},
507};
508
509TEST(StringUtilTest, LowerCaseEqualsASCII) {
510  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
511    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
512                                     lowercase_cases[i].dst));
513    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
514                                     lowercase_cases[i].dst));
515  }
516}
517
518TEST(StringUtilTest, GetByteDisplayUnits) {
519  static const struct {
520    int64 bytes;
521    DataUnits expected;
522  } cases[] = {
523    {0, DATA_UNITS_BYTE},
524    {512, DATA_UNITS_BYTE},
525    {10*1024, DATA_UNITS_KIBIBYTE},
526    {10*1024*1024, DATA_UNITS_MEBIBYTE},
527    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
528    {~(1LL<<63), DATA_UNITS_GIBIBYTE},
529#ifdef NDEBUG
530    {-1, DATA_UNITS_BYTE},
531#endif
532  };
533
534  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
535    EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
536}
537
538TEST(StringUtilTest, FormatBytes) {
539  static const struct {
540    int64 bytes;
541    DataUnits units;
542    const char* expected;
543    const char* expected_with_units;
544  } cases[] = {
545    // Expected behavior: we show one post-decimal digit when we have
546    // under two pre-decimal digits, except in cases where it makes no
547    // sense (zero or bytes).
548    // Since we switch units once we cross the 1000 mark, this keeps
549    // the display of file sizes or bytes consistently around three
550    // digits.
551    {0, DATA_UNITS_BYTE, "0", "0 B"},
552    {512, DATA_UNITS_BYTE, "512", "512 B"},
553    {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
554    {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
555    {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
556    {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
557    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
558    {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
559    {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
560    {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
561     "105", "105 GB"},
562    {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
563
564    {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
565    {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
566    {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
567    {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
568     "1.9", "1.9 GB"},
569    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
570    {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
571#ifdef NDEBUG
572    {-1, DATA_UNITS_BYTE, "", ""},
573#endif
574  };
575
576  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
577    EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
578              FormatBytes(cases[i].bytes, cases[i].units, false));
579    EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
580              FormatBytes(cases[i].bytes, cases[i].units, true));
581  }
582}
583
584TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
585  static const struct {
586    const char* str;
587    string16::size_type start_offset;
588    const char* find_this;
589    const char* replace_with;
590    const char* expected;
591  } cases[] = {
592    {"aaa", 0, "a", "b", "bbb"},
593    {"abb", 0, "ab", "a", "ab"},
594    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
595    {"Not found", 0, "x", "0", "Not found"},
596    {"Not found again", 5, "x", "0", "Not found again"},
597    {" Making it much longer ", 0, " ", "Four score and seven years ago",
598     "Four score and seven years agoMakingFour score and seven years agoit"
599     "Four score and seven years agomuchFour score and seven years agolonger"
600     "Four score and seven years ago"},
601    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
602    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
603    {"abababab", 2, "ab", "c", "abccc"},
604  };
605
606  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
607    string16 str = ASCIIToUTF16(cases[i].str);
608    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
609                                 ASCIIToUTF16(cases[i].find_this),
610                                 ASCIIToUTF16(cases[i].replace_with));
611    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
612  }
613}
614
615TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
616  static const struct {
617    const char* str;
618    string16::size_type start_offset;
619    const char* find_this;
620    const char* replace_with;
621    const char* expected;
622  } cases[] = {
623    {"aaa", 0, "a", "b", "baa"},
624    {"abb", 0, "ab", "a", "ab"},
625    {"Removing some substrings inging", 0, "ing", "",
626      "Remov some substrings inging"},
627    {"Not found", 0, "x", "0", "Not found"},
628    {"Not found again", 5, "x", "0", "Not found again"},
629    {" Making it much longer ", 0, " ", "Four score and seven years ago",
630     "Four score and seven years agoMaking it much longer "},
631    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
632    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
633    {"abababab", 2, "ab", "c", "abcabab"},
634  };
635
636  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
637    string16 str = ASCIIToUTF16(cases[i].str);
638    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
639                                     ASCIIToUTF16(cases[i].find_this),
640                                     ASCIIToUTF16(cases[i].replace_with));
641    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
642  }
643}
644
645TEST(StringUtilTest, HexDigitToInt) {
646  EXPECT_EQ(0, HexDigitToInt('0'));
647  EXPECT_EQ(1, HexDigitToInt('1'));
648  EXPECT_EQ(2, HexDigitToInt('2'));
649  EXPECT_EQ(3, HexDigitToInt('3'));
650  EXPECT_EQ(4, HexDigitToInt('4'));
651  EXPECT_EQ(5, HexDigitToInt('5'));
652  EXPECT_EQ(6, HexDigitToInt('6'));
653  EXPECT_EQ(7, HexDigitToInt('7'));
654  EXPECT_EQ(8, HexDigitToInt('8'));
655  EXPECT_EQ(9, HexDigitToInt('9'));
656  EXPECT_EQ(10, HexDigitToInt('A'));
657  EXPECT_EQ(11, HexDigitToInt('B'));
658  EXPECT_EQ(12, HexDigitToInt('C'));
659  EXPECT_EQ(13, HexDigitToInt('D'));
660  EXPECT_EQ(14, HexDigitToInt('E'));
661  EXPECT_EQ(15, HexDigitToInt('F'));
662
663  // Verify the lower case as well.
664  EXPECT_EQ(10, HexDigitToInt('a'));
665  EXPECT_EQ(11, HexDigitToInt('b'));
666  EXPECT_EQ(12, HexDigitToInt('c'));
667  EXPECT_EQ(13, HexDigitToInt('d'));
668  EXPECT_EQ(14, HexDigitToInt('e'));
669  EXPECT_EQ(15, HexDigitToInt('f'));
670}
671
672// This checks where we can use the assignment operator for a va_list. We need
673// a way to do this since Visual C doesn't support va_copy, but assignment on
674// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
675// capability.
676static void VariableArgsFunc(const char* format, ...) {
677  va_list org;
678  va_start(org, format);
679
680  va_list dup;
681  GG_VA_COPY(dup, org);
682  int i1 = va_arg(org, int);
683  int j1 = va_arg(org, int);
684  char* s1 = va_arg(org, char*);
685  double d1 = va_arg(org, double);
686  va_end(org);
687
688  int i2 = va_arg(dup, int);
689  int j2 = va_arg(dup, int);
690  char* s2 = va_arg(dup, char*);
691  double d2 = va_arg(dup, double);
692
693  EXPECT_EQ(i1, i2);
694  EXPECT_EQ(j1, j2);
695  EXPECT_STREQ(s1, s2);
696  EXPECT_EQ(d1, d2);
697
698  va_end(dup);
699}
700
701TEST(StringUtilTest, VAList) {
702  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
703}
704
705// Test for Tokenize
706template <typename STR>
707void TokenizeTest() {
708  std::vector<STR> r;
709  size_t size;
710
711  size = Tokenize(STR("This is a string"), STR(" "), &r);
712  EXPECT_EQ(4U, size);
713  ASSERT_EQ(4U, r.size());
714  EXPECT_EQ(r[0], STR("This"));
715  EXPECT_EQ(r[1], STR("is"));
716  EXPECT_EQ(r[2], STR("a"));
717  EXPECT_EQ(r[3], STR("string"));
718  r.clear();
719
720  size = Tokenize(STR("one,two,three"), STR(","), &r);
721  EXPECT_EQ(3U, size);
722  ASSERT_EQ(3U, r.size());
723  EXPECT_EQ(r[0], STR("one"));
724  EXPECT_EQ(r[1], STR("two"));
725  EXPECT_EQ(r[2], STR("three"));
726  r.clear();
727
728  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
729  EXPECT_EQ(3U, size);
730  ASSERT_EQ(3U, r.size());
731  EXPECT_EQ(r[0], STR("one"));
732  EXPECT_EQ(r[1], STR("two"));
733  EXPECT_EQ(r[2], STR("three;four"));
734  r.clear();
735
736  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
737  EXPECT_EQ(4U, size);
738  ASSERT_EQ(4U, r.size());
739  EXPECT_EQ(r[0], STR("one"));
740  EXPECT_EQ(r[1], STR("two"));
741  EXPECT_EQ(r[2], STR("three"));
742  EXPECT_EQ(r[3], STR("four"));
743  r.clear();
744
745  size = Tokenize(STR("one, two, three"), STR(","), &r);
746  EXPECT_EQ(3U, size);
747  ASSERT_EQ(3U, r.size());
748  EXPECT_EQ(r[0], STR("one"));
749  EXPECT_EQ(r[1], STR(" two"));
750  EXPECT_EQ(r[2], STR(" three"));
751  r.clear();
752
753  size = Tokenize(STR("one, two, three, "), STR(","), &r);
754  EXPECT_EQ(4U, size);
755  ASSERT_EQ(4U, r.size());
756  EXPECT_EQ(r[0], STR("one"));
757  EXPECT_EQ(r[1], STR(" two"));
758  EXPECT_EQ(r[2], STR(" three"));
759  EXPECT_EQ(r[3], STR(" "));
760  r.clear();
761
762  size = Tokenize(STR("one, two, three,"), STR(","), &r);
763  EXPECT_EQ(3U, size);
764  ASSERT_EQ(3U, r.size());
765  EXPECT_EQ(r[0], STR("one"));
766  EXPECT_EQ(r[1], STR(" two"));
767  EXPECT_EQ(r[2], STR(" three"));
768  r.clear();
769
770  size = Tokenize(STR(""), STR(","), &r);
771  EXPECT_EQ(0U, size);
772  ASSERT_EQ(0U, r.size());
773  r.clear();
774
775  size = Tokenize(STR(","), STR(","), &r);
776  EXPECT_EQ(0U, size);
777  ASSERT_EQ(0U, r.size());
778  r.clear();
779
780  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
781  EXPECT_EQ(0U, size);
782  ASSERT_EQ(0U, r.size());
783  r.clear();
784
785  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
786  EXPECT_EQ(1U, size);
787  ASSERT_EQ(1U, r.size());
788  EXPECT_EQ(r[0], STR("a"));
789  r.clear();
790
791  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
792  EXPECT_EQ(2U, size);
793  ASSERT_EQ(2U, r.size());
794  EXPECT_EQ(r[0], STR("\ta\t"));
795  EXPECT_EQ(r[1], STR("b\tcc"));
796  r.clear();
797}
798
799TEST(StringUtilTest, TokenizeStdString) {
800  TokenizeTest<std::string>();
801}
802
803TEST(StringUtilTest, TokenizeStringPiece) {
804  TokenizeTest<base::StringPiece>();
805}
806
807// Test for JoinString
808TEST(StringUtilTest, JoinString) {
809  std::vector<std::string> in;
810  EXPECT_EQ("", JoinString(in, ','));
811
812  in.push_back("a");
813  EXPECT_EQ("a", JoinString(in, ','));
814
815  in.push_back("b");
816  in.push_back("c");
817  EXPECT_EQ("a,b,c", JoinString(in, ','));
818
819  in.push_back("");
820  EXPECT_EQ("a,b,c,", JoinString(in, ','));
821  in.push_back(" ");
822  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
823}
824
825TEST(StringUtilTest, StartsWith) {
826  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
827  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
828  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
829  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
830  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
831  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
832  EXPECT_FALSE(StartsWithASCII("", "javascript", false));
833  EXPECT_FALSE(StartsWithASCII("", "javascript", true));
834  EXPECT_TRUE(StartsWithASCII("java", "", false));
835  EXPECT_TRUE(StartsWithASCII("java", "", true));
836
837  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
838  EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
839  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
840  EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
841  EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
842  EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
843  EXPECT_FALSE(StartsWith(L"", L"javascript", false));
844  EXPECT_FALSE(StartsWith(L"", L"javascript", true));
845  EXPECT_TRUE(StartsWith(L"java", L"", false));
846  EXPECT_TRUE(StartsWith(L"java", L"", true));
847}
848
849TEST(StringUtilTest, EndsWith) {
850  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
851  EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
852  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
853  EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
854  EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
855  EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
856  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
857  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
858  EXPECT_FALSE(EndsWith(L"", L".plugin", false));
859  EXPECT_FALSE(EndsWith(L"", L".plugin", true));
860  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
861  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
862  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
863  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
864  EXPECT_TRUE(EndsWith(L"", L"", false));
865  EXPECT_TRUE(EndsWith(L"", L"", true));
866}
867
868TEST(StringUtilTest, GetStringFWithOffsets) {
869  std::vector<string16> subst;
870  subst.push_back(ASCIIToUTF16("1"));
871  subst.push_back(ASCIIToUTF16("2"));
872  std::vector<size_t> offsets;
873
874  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
875                            subst,
876                            &offsets);
877  EXPECT_EQ(2U, offsets.size());
878  EXPECT_EQ(7U, offsets[0]);
879  EXPECT_EQ(25U, offsets[1]);
880  offsets.clear();
881
882  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
883                            subst,
884                            &offsets);
885  EXPECT_EQ(2U, offsets.size());
886  EXPECT_EQ(25U, offsets[0]);
887  EXPECT_EQ(7U, offsets[1]);
888  offsets.clear();
889}
890
891TEST(StringUtilTest, ReplaceStringPlaceholders) {
892  std::vector<string16> subst;
893  subst.push_back(ASCIIToUTF16("9a"));
894  subst.push_back(ASCIIToUTF16("8b"));
895  subst.push_back(ASCIIToUTF16("7c"));
896  subst.push_back(ASCIIToUTF16("6d"));
897  subst.push_back(ASCIIToUTF16("5e"));
898  subst.push_back(ASCIIToUTF16("4f"));
899  subst.push_back(ASCIIToUTF16("3g"));
900  subst.push_back(ASCIIToUTF16("2h"));
901  subst.push_back(ASCIIToUTF16("1i"));
902
903  string16 formatted =
904      ReplaceStringPlaceholders(
905          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
906
907  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
908}
909
910TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
911  // Test whether replacestringplaceholders works as expected when there
912  // are fewer inputs than outputs.
913  std::vector<string16> subst;
914  subst.push_back(ASCIIToUTF16("9a"));
915  subst.push_back(ASCIIToUTF16("8b"));
916  subst.push_back(ASCIIToUTF16("7c"));
917
918  string16 formatted =
919      ReplaceStringPlaceholders(
920          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
921
922  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
923}
924
925TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
926  std::vector<std::string> subst;
927  subst.push_back("9a");
928  subst.push_back("8b");
929  subst.push_back("7c");
930  subst.push_back("6d");
931  subst.push_back("5e");
932  subst.push_back("4f");
933  subst.push_back("3g");
934  subst.push_back("2h");
935  subst.push_back("1i");
936
937  std::string formatted =
938      ReplaceStringPlaceholders(
939          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
940
941  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
942}
943
944TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
945  std::vector<std::string> subst;
946  subst.push_back("a");
947  subst.push_back("b");
948  subst.push_back("c");
949  EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
950            "$1 $$2 $$$3");
951}
952
953TEST(StringUtilTest, SplitStringAlongWhitespace) {
954  struct TestData {
955    const std::wstring input;
956    const size_t expected_result_count;
957    const std::wstring output1;
958    const std::wstring output2;
959  } data[] = {
960    { L"a",       1, L"a",  L""   },
961    { L" ",       0, L"",   L""   },
962    { L" a",      1, L"a",  L""   },
963    { L" ab ",    1, L"ab", L""   },
964    { L" ab c",   2, L"ab", L"c"  },
965    { L" ab c ",  2, L"ab", L"c"  },
966    { L" ab cd",  2, L"ab", L"cd" },
967    { L" ab cd ", 2, L"ab", L"cd" },
968    { L" \ta\t",  1, L"a",  L""   },
969    { L" b\ta\t", 2, L"b",  L"a"  },
970    { L" b\tat",  2, L"b",  L"at" },
971    { L"b\tat",   2, L"b",  L"at" },
972    { L"b\t at",  2, L"b",  L"at" },
973  };
974  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
975    std::vector<std::wstring> results;
976    SplitStringAlongWhitespace(data[i].input, &results);
977    ASSERT_EQ(data[i].expected_result_count, results.size());
978    if (data[i].expected_result_count > 0)
979      ASSERT_EQ(data[i].output1, results[0]);
980    if (data[i].expected_result_count > 1)
981      ASSERT_EQ(data[i].output2, results[1]);
982  }
983}
984
985TEST(StringUtilTest, MatchPatternTest) {
986  EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
987  EXPECT_TRUE(MatchPattern("www.google.com", "*"));
988  EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
989  EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
990  EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
991  EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
992  EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
993  EXPECT_FALSE(MatchPattern("", "*.*"));
994  EXPECT_TRUE(MatchPattern("", "*"));
995  EXPECT_TRUE(MatchPattern("", "?"));
996  EXPECT_TRUE(MatchPattern("", ""));
997  EXPECT_FALSE(MatchPattern("Hello", ""));
998  EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
999  // Stop after a certain recursion depth.
1000  EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
1001
1002  // Test UTF8 matching.
1003  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
1004  EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
1005  EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
1006  // Invalid sequences should be handled as a single invalid character.
1007  EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
1008  // If the pattern has invalid characters, it shouldn't match anything.
1009  EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
1010
1011  // Test UTF16 character matching.
1012  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
1013                           UTF8ToUTF16("*.com")));
1014  EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
1015                           UTF8ToUTF16("He??o\\*1*")));
1016}
1017
1018TEST(StringUtilTest, LcpyTest) {
1019  // Test the normal case where we fit in our buffer.
1020  {
1021    char dst[10];
1022    wchar_t wdst[10];
1023    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1024    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1025    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1026    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1027  }
1028
1029  // Test dst_size == 0, nothing should be written to |dst| and we should
1030  // have the equivalent of strlen(src).
1031  {
1032    char dst[2] = {1, 2};
1033    wchar_t wdst[2] = {1, 2};
1034    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1035    EXPECT_EQ(1, dst[0]);
1036    EXPECT_EQ(2, dst[1]);
1037    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1038#if defined(WCHAR_T_IS_UNSIGNED)
1039    EXPECT_EQ(1U, wdst[0]);
1040    EXPECT_EQ(2U, wdst[1]);
1041#else
1042    EXPECT_EQ(1, wdst[0]);
1043    EXPECT_EQ(2, wdst[1]);
1044#endif
1045  }
1046
1047  // Test the case were we _just_ competely fit including the null.
1048  {
1049    char dst[8];
1050    wchar_t wdst[8];
1051    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1052    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1053    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1054    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1055  }
1056
1057  // Test the case were we we are one smaller, so we can't fit the null.
1058  {
1059    char dst[7];
1060    wchar_t wdst[7];
1061    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1062    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1063    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1064    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1065  }
1066
1067  // Test the case were we are just too small.
1068  {
1069    char dst[3];
1070    wchar_t wdst[3];
1071    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1072    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1073    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1074    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1075  }
1076}
1077
1078TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1079  struct TestData {
1080    const wchar_t* input;
1081    bool portable;
1082  } cases[] = {
1083    { L"%ls", true },
1084    { L"%s", false },
1085    { L"%S", false },
1086    { L"%lS", false },
1087    { L"Hello, %s", false },
1088    { L"%lc", true },
1089    { L"%c", false },
1090    { L"%C", false },
1091    { L"%lC", false },
1092    { L"%ls %s", false },
1093    { L"%s %ls", false },
1094    { L"%s %ls %s", false },
1095    { L"%f", true },
1096    { L"%f %F", false },
1097    { L"%d %D", false },
1098    { L"%o %O", false },
1099    { L"%u %U", false },
1100    { L"%f %d %o %u", true },
1101    { L"%-8d (%02.1f%)", true },
1102    { L"% 10s", false },
1103    { L"% 10ls", true }
1104  };
1105  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1106    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1107  }
1108}
1109
1110TEST(StringUtilTest, ElideString) {
1111  struct TestData {
1112    const wchar_t* input;
1113    int max_len;
1114    bool result;
1115    const wchar_t* output;
1116  } cases[] = {
1117    { L"Hello", 0, true, L"" },
1118    { L"", 0, false, L"" },
1119    { L"Hello, my name is Tom", 1, true, L"H" },
1120    { L"Hello, my name is Tom", 2, true, L"He" },
1121    { L"Hello, my name is Tom", 3, true, L"H.m" },
1122    { L"Hello, my name is Tom", 4, true, L"H..m" },
1123    { L"Hello, my name is Tom", 5, true, L"H...m" },
1124    { L"Hello, my name is Tom", 6, true, L"He...m" },
1125    { L"Hello, my name is Tom", 7, true, L"He...om" },
1126    { L"Hello, my name is Tom", 10, true, L"Hell...Tom" },
1127    { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" }
1128  };
1129  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1130    std::wstring output;
1131    EXPECT_EQ(cases[i].result,
1132              ElideString(cases[i].input, cases[i].max_len, &output));
1133    EXPECT_TRUE(output == cases[i].output);
1134  }
1135}
1136
1137TEST(StringUtilTest, RemoveChars) {
1138  const char* kRemoveChars = "-/+*";
1139  std::string input = "A-+bc/d!*";
1140  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1141  EXPECT_EQ("Abcd!", input);
1142
1143  // No characters match kRemoveChars.
1144  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1145  EXPECT_EQ("Abcd!", input);
1146
1147  // Empty string.
1148  input.clear();
1149  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1150  EXPECT_EQ(std::string(), input);
1151}
1152
1153TEST(StringUtilTest, ContainsOnlyChars) {
1154  // Providing an empty list of characters should return false but for the empty
1155  // string.
1156  EXPECT_TRUE(ContainsOnlyChars("", ""));
1157  EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1158
1159  EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1160  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1161  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1162  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1163  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1164}
1165
1166}  // namespace base
1167