string_util_unittest.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <math.h>
6#include <stdarg.h>
7
8#include <limits>
9#include <sstream>
10
11#include "base/basictypes.h"
12#include "base/string_util.h"
13#include "base/utf_string_conversions.h"
14#include "testing/gmock/include/gmock/gmock.h"
15#include "testing/gtest/include/gtest/gtest.h"
16
17using ::testing::ElementsAre;
18
19namespace base {
20
21namespace {
22
23// Given a null-terminated string of wchar_t with each wchar_t representing
24// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
25// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
26// should be represented as a surrogate pair (two UTF-16 units)
27// *even* where wchar_t is 32-bit (Linux and Mac).
28//
29// This is to help write tests for functions with string16 params until
30// the C++ 0x UTF-16 literal is well-supported by compilers.
31string16 BuildString16(const wchar_t* s) {
32#if defined(WCHAR_T_IS_UTF16)
33  return string16(s);
34#elif defined(WCHAR_T_IS_UTF32)
35  string16 u16;
36  while (*s != 0) {
37    DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu);
38    u16.push_back(*s++);
39  }
40  return u16;
41#endif
42}
43
44}  // namespace
45
46static const struct trim_case {
47  const wchar_t* input;
48  const TrimPositions positions;
49  const wchar_t* output;
50  const TrimPositions return_value;
51} trim_cases[] = {
52  {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
53  {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
54  {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
55  {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
56  {L"", TRIM_ALL, L"", TRIM_NONE},
57  {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
58  {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
59  {L"  ", TRIM_ALL, L"", TRIM_ALL},
60  {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
61  {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
62};
63
64static const struct trim_case_ascii {
65  const char* input;
66  const TrimPositions positions;
67  const char* output;
68  const TrimPositions return_value;
69} trim_cases_ascii[] = {
70  {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
71  {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
72  {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
73  {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
74  {"", TRIM_ALL, "", TRIM_NONE},
75  {"  ", TRIM_LEADING, "", TRIM_LEADING},
76  {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
77  {"  ", TRIM_ALL, "", TRIM_ALL},
78  {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
79};
80
81namespace {
82
83// Helper used to test TruncateUTF8ToByteSize.
84bool Truncated(const std::string& input, const size_t byte_size,
85               std::string* output) {
86    size_t prev = input.length();
87    TruncateUTF8ToByteSize(input, byte_size, output);
88    return prev != output->length();
89}
90
91}  // namespace
92
93TEST(StringUtilTest, TruncateUTF8ToByteSize) {
94  std::string output;
95
96  // Empty strings and invalid byte_size arguments
97  EXPECT_FALSE(Truncated("", 0, &output));
98  EXPECT_EQ(output, "");
99  EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
100  EXPECT_EQ(output, "");
101  EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
102  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
103
104  // Testing the truncation of valid UTF8 correctly
105  EXPECT_TRUE(Truncated("abc", 2, &output));
106  EXPECT_EQ(output, "ab");
107  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
108  EXPECT_EQ(output.compare("\xc2\x81"), 0);
109  EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
110  EXPECT_EQ(output.compare("\xc2\x81"), 0);
111  EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
112  EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
113
114  {
115    const char array[] = "\x00\x00\xc2\x81\xc2\x81";
116    const std::string array_string(array, arraysize(array));
117    EXPECT_TRUE(Truncated(array_string, 4, &output));
118    EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
119  }
120
121  {
122    const char array[] = "\x00\xc2\x81\xc2\x81";
123    const std::string array_string(array, arraysize(array));
124    EXPECT_TRUE(Truncated(array_string, 4, &output));
125    EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
126  }
127
128  // Testing invalid UTF8
129  EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
130  EXPECT_EQ(output.compare(""), 0);
131  EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
132  EXPECT_EQ(output.compare(""), 0);
133  EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
134  EXPECT_EQ(output.compare(""), 0);
135
136  // Testing invalid UTF8 mixed with valid UTF8
137  EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
138  EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
139  EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
140  EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
141  EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
142              10, &output));
143  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
144  EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
145              10, &output));
146  EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
147  EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
148  EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
149
150  // Overlong sequences
151  EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
152  EXPECT_EQ(output.compare(""), 0);
153  EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
154  EXPECT_EQ(output.compare(""), 0);
155  EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
156  EXPECT_EQ(output.compare(""), 0);
157  EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
158  EXPECT_EQ(output.compare(""), 0);
159  EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
160  EXPECT_EQ(output.compare(""), 0);
161  EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
162  EXPECT_EQ(output.compare(""), 0);
163  EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
164  EXPECT_EQ(output.compare(""), 0);
165  EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
166  EXPECT_EQ(output.compare(""), 0);
167  EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
168  EXPECT_EQ(output.compare(""), 0);
169  EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
170  EXPECT_EQ(output.compare(""), 0);
171  EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
172  EXPECT_EQ(output.compare(""), 0);
173
174  // Beyond U+10FFFF (the upper limit of Unicode codespace)
175  EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
176  EXPECT_EQ(output.compare(""), 0);
177  EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
178  EXPECT_EQ(output.compare(""), 0);
179  EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
180  EXPECT_EQ(output.compare(""), 0);
181
182  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
183  EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
184  EXPECT_EQ(output.compare(""), 0);
185  EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
186  EXPECT_EQ(output.compare(""), 0);
187
188  {
189    const char array[] = "\x00\x00\xfe\xff";
190    const std::string array_string(array, arraysize(array));
191    EXPECT_TRUE(Truncated(array_string, 4, &output));
192    EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
193  }
194
195  // Variants on the previous test
196  {
197    const char array[] = "\xff\xfe\x00\x00";
198    const std::string array_string(array, 4);
199    EXPECT_FALSE(Truncated(array_string, 4, &output));
200    EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
201  }
202  {
203    const char array[] = "\xff\x00\x00\xfe";
204    const std::string array_string(array, arraysize(array));
205    EXPECT_TRUE(Truncated(array_string, 4, &output));
206    EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
207  }
208
209  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
210  EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
211  EXPECT_EQ(output.compare(""), 0);
212  EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
213  EXPECT_EQ(output.compare(""), 0);
214  EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
215  EXPECT_EQ(output.compare(""), 0);
216  EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
217  EXPECT_EQ(output.compare(""), 0);
218  EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
219  EXPECT_EQ(output.compare(""), 0);
220
221  // Strings in legacy encodings that are valid in UTF-8, but
222  // are invalid as UTF-8 in real data.
223  EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
224  EXPECT_EQ(output.compare("caf"), 0);
225  EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
226  EXPECT_EQ(output.compare(""), 0);
227  EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
228  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
229  EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
230              &output));
231  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
232
233  // Testing using the same string as input and output.
234  EXPECT_FALSE(Truncated(output, 4, &output));
235  EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
236  EXPECT_TRUE(Truncated(output, 3, &output));
237  EXPECT_EQ(output.compare("\xa7\x41"), 0);
238
239  // "abc" with U+201[CD] in windows-125[0-8]
240  EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
241  EXPECT_EQ(output.compare("\x93" "abc"), 0);
242
243  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
244  EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
245  EXPECT_EQ(output.compare(""), 0);
246
247  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
248  EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
249  EXPECT_EQ(output.compare(""), 0);
250}
251
252TEST(StringUtilTest, TrimWhitespace) {
253  std::wstring output;  // Allow contents to carry over to next testcase
254  for (size_t i = 0; i < arraysize(trim_cases); ++i) {
255    const trim_case& value = trim_cases[i];
256    EXPECT_EQ(value.return_value,
257              TrimWhitespace(value.input, value.positions, &output));
258    EXPECT_EQ(value.output, output);
259  }
260
261  // Test that TrimWhitespace() can take the same string for input and output
262  output = L"  This is a test \r\n";
263  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
264  EXPECT_EQ(L"This is a test", output);
265
266  // Once more, but with a string of whitespace
267  output = L"  \r\n";
268  EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
269  EXPECT_EQ(L"", output);
270
271  std::string output_ascii;
272  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
273    const trim_case_ascii& value = trim_cases_ascii[i];
274    EXPECT_EQ(value.return_value,
275              TrimWhitespace(value.input, value.positions, &output_ascii));
276    EXPECT_EQ(value.output, output_ascii);
277  }
278}
279
280static const struct collapse_case {
281  const wchar_t* input;
282  const bool trim;
283  const wchar_t* output;
284} collapse_cases[] = {
285  {L" Google Video ", false, L"Google Video"},
286  {L"Google Video", false, L"Google Video"},
287  {L"", false, L""},
288  {L"  ", false, L""},
289  {L"\t\rTest String\n", false, L"Test String"},
290  {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
291  {L"    Test     \n  \t String    ", false, L"Test String"},
292  {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
293  {L"   Test String", false, L"Test String"},
294  {L"Test String    ", false, L"Test String"},
295  {L"Test String", false, L"Test String"},
296  {L"", true, L""},
297  {L"\n", true, L""},
298  {L"  \r  ", true, L""},
299  {L"\nFoo", true, L"Foo"},
300  {L"\r  Foo  ", true, L"Foo"},
301  {L" Foo bar ", true, L"Foo bar"},
302  {L"  \tFoo  bar  \n", true, L"Foo bar"},
303  {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
304};
305
306TEST(StringUtilTest, CollapseWhitespace) {
307  for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
308    const collapse_case& value = collapse_cases[i];
309    EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
310  }
311}
312
313static const struct collapse_case_ascii {
314  const char* input;
315  const bool trim;
316  const char* output;
317} collapse_cases_ascii[] = {
318  {" Google Video ", false, "Google Video"},
319  {"Google Video", false, "Google Video"},
320  {"", false, ""},
321  {"  ", false, ""},
322  {"\t\rTest String\n", false, "Test String"},
323  {"    Test     \n  \t String    ", false, "Test String"},
324  {"   Test String", false, "Test String"},
325  {"Test String    ", false, "Test String"},
326  {"Test String", false, "Test String"},
327  {"", true, ""},
328  {"\n", true, ""},
329  {"  \r  ", true, ""},
330  {"\nFoo", true, "Foo"},
331  {"\r  Foo  ", true, "Foo"},
332  {" Foo bar ", true, "Foo bar"},
333  {"  \tFoo  bar  \n", true, "Foo bar"},
334  {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
335};
336
337TEST(StringUtilTest, CollapseWhitespaceASCII) {
338  for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
339    const collapse_case_ascii& value = collapse_cases_ascii[i];
340    EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
341  }
342}
343
344TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
345  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
346  EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
347  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
348  EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
349  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
350  EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
351}
352
353TEST(StringUtilTest, ContainsOnlyWhitespace) {
354  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
355  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
356  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
357  EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
358  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
359  EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
360}
361
362TEST(StringUtilTest, IsStringUTF8) {
363  EXPECT_TRUE(IsStringUTF8("abc"));
364  EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
365  EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
366  EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
367  EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
368  EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
369
370  // surrogate code points
371  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
372  EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
373  EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
374
375  // overlong sequences
376  EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
377  EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
378  EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
379  EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
380  EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
381  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
382  EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
383  EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
384  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
385  EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
386  EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
387
388  // Beyond U+10FFFF (the upper limit of Unicode codespace)
389  EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
390  EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
391  EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
392
393  // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
394  EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
395  EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
396  EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
397  EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
398
399  // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
400  EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
401  EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
402  EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
403  EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
404  EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
405  // Strings in legacy encodings. We can certainly make up strings
406  // in a legacy encoding that are valid in UTF-8, but in real data,
407  // most of them are invalid as UTF-8.
408  EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
409  EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
410  EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
411  // "abc" with U+201[CD] in windows-125[0-8]
412  EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
413  // U+0639 U+064E U+0644 U+064E in ISO-8859-6
414  EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
415  // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
416  EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
417}
418
419TEST(StringUtilTest, ConvertASCII) {
420  static const char* char_cases[] = {
421    "Google Video",
422    "Hello, world\n",
423    "0123ABCDwxyz \a\b\t\r\n!+,.~"
424  };
425
426  static const wchar_t* const wchar_cases[] = {
427    L"Google Video",
428    L"Hello, world\n",
429    L"0123ABCDwxyz \a\b\t\r\n!+,.~"
430  };
431
432  for (size_t i = 0; i < arraysize(char_cases); ++i) {
433    EXPECT_TRUE(IsStringASCII(char_cases[i]));
434    std::wstring wide = ASCIIToWide(char_cases[i]);
435    EXPECT_EQ(wchar_cases[i], wide);
436
437    EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
438    std::string ascii = WideToASCII(wchar_cases[i]);
439    EXPECT_EQ(char_cases[i], ascii);
440  }
441
442  EXPECT_FALSE(IsStringASCII("Google \x80Video"));
443  EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
444
445  // Convert empty strings.
446  std::wstring wempty;
447  std::string empty;
448  EXPECT_EQ(empty, WideToASCII(wempty));
449  EXPECT_EQ(wempty, ASCIIToWide(empty));
450
451  // Convert strings with an embedded NUL character.
452  const char chars_with_nul[] = "test\0string";
453  const int length_with_nul = arraysize(chars_with_nul) - 1;
454  std::string string_with_nul(chars_with_nul, length_with_nul);
455  std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
456  EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
457            wide_with_nul.length());
458  std::string narrow_with_nul = WideToASCII(wide_with_nul);
459  EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
460            narrow_with_nul.length());
461  EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
462}
463
464TEST(StringUtilTest, ToUpperASCII) {
465  EXPECT_EQ('C', ToUpperASCII('C'));
466  EXPECT_EQ('C', ToUpperASCII('c'));
467  EXPECT_EQ('2', ToUpperASCII('2'));
468
469  EXPECT_EQ(L'C', ToUpperASCII(L'C'));
470  EXPECT_EQ(L'C', ToUpperASCII(L'c'));
471  EXPECT_EQ(L'2', ToUpperASCII(L'2'));
472
473  std::string in_place_a("Cc2");
474  StringToUpperASCII(&in_place_a);
475  EXPECT_EQ("CC2", in_place_a);
476
477  std::wstring in_place_w(L"Cc2");
478  StringToUpperASCII(&in_place_w);
479  EXPECT_EQ(L"CC2", in_place_w);
480
481  std::string original_a("Cc2");
482  std::string upper_a = StringToUpperASCII(original_a);
483  EXPECT_EQ("CC2", upper_a);
484
485  std::wstring original_w(L"Cc2");
486  std::wstring upper_w = StringToUpperASCII(original_w);
487  EXPECT_EQ(L"CC2", upper_w);
488}
489
490static const struct {
491  const wchar_t* src_w;
492  const char*    src_a;
493  const char*    dst;
494} lowercase_cases[] = {
495  {L"FoO", "FoO", "foo"},
496  {L"foo", "foo", "foo"},
497  {L"FOO", "FOO", "foo"},
498};
499
500TEST(StringUtilTest, LowerCaseEqualsASCII) {
501  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
502    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
503                                     lowercase_cases[i].dst));
504    EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
505                                     lowercase_cases[i].dst));
506  }
507}
508
509TEST(StringUtilTest, GetByteDisplayUnits) {
510  static const struct {
511    int64 bytes;
512    DataUnits expected;
513  } cases[] = {
514    {0, DATA_UNITS_BYTE},
515    {512, DATA_UNITS_BYTE},
516    {10*1024, DATA_UNITS_KIBIBYTE},
517    {10*1024*1024, DATA_UNITS_MEBIBYTE},
518    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
519    {~(1LL<<63), DATA_UNITS_GIBIBYTE},
520#ifdef NDEBUG
521    {-1, DATA_UNITS_BYTE},
522#endif
523  };
524
525  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
526    EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
527}
528
529TEST(StringUtilTest, FormatBytes) {
530  static const struct {
531    int64 bytes;
532    DataUnits units;
533    const wchar_t* expected;
534    const wchar_t* expected_with_units;
535  } cases[] = {
536    // Expected behavior: we show one post-decimal digit when we have
537    // under two pre-decimal digits, except in cases where it makes no
538    // sense (zero or bytes).
539    // Since we switch units once we cross the 1000 mark, this keeps
540    // the display of file sizes or bytes consistently around three
541    // digits.
542    {0, DATA_UNITS_BYTE, L"0", L"0 B"},
543    {512, DATA_UNITS_BYTE, L"512", L"512 B"},
544    {512, DATA_UNITS_KIBIBYTE, L"0.5", L"0.5 kB"},
545    {1024*1024, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"},
546    {1024*1024, DATA_UNITS_MEBIBYTE, L"1.0", L"1.0 MB"},
547    {1024*1024*1024, DATA_UNITS_GIBIBYTE, L"1.0", L"1.0 GB"},
548    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10.0", L"10.0 GB"},
549    {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"99.0", L"99.0 GB"},
550    {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"105", L"105 GB"},
551    {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
552     L"105", L"105 GB"},
553    {~(1LL<<63), DATA_UNITS_GIBIBYTE, L"8589934592", L"8589934592 GB"},
554
555    {99*1024 + 103, DATA_UNITS_KIBIBYTE, L"99.1", L"99.1 kB"},
556    {1024*1024 + 103, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"},
557    {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, L"1.2", L"1.2 MB"},
558    {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
559     L"1.9", L"1.9 GB"},
560    {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10.0", L"10.0 GB"},
561    {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"100", L"100 GB"},
562#ifdef NDEBUG
563    {-1, DATA_UNITS_BYTE, L"", L""},
564#endif
565  };
566
567  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
568    EXPECT_EQ(cases[i].expected,
569              FormatBytes(cases[i].bytes, cases[i].units, false));
570    EXPECT_EQ(cases[i].expected_with_units,
571              FormatBytes(cases[i].bytes, cases[i].units, true));
572  }
573}
574
575TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
576  static const struct {
577    const char* str;
578    string16::size_type start_offset;
579    const char* find_this;
580    const char* replace_with;
581    const char* expected;
582  } cases[] = {
583    {"aaa", 0, "a", "b", "bbb"},
584    {"abb", 0, "ab", "a", "ab"},
585    {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
586    {"Not found", 0, "x", "0", "Not found"},
587    {"Not found again", 5, "x", "0", "Not found again"},
588    {" Making it much longer ", 0, " ", "Four score and seven years ago",
589     "Four score and seven years agoMakingFour score and seven years agoit"
590     "Four score and seven years agomuchFour score and seven years agolonger"
591     "Four score and seven years ago"},
592    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
593    {"Replace me only me once", 9, "me ", "", "Replace me only once"},
594    {"abababab", 2, "ab", "c", "abccc"},
595  };
596
597  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
598    string16 str = ASCIIToUTF16(cases[i].str);
599    ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
600                                 ASCIIToUTF16(cases[i].find_this),
601                                 ASCIIToUTF16(cases[i].replace_with));
602    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
603  }
604}
605
606TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
607  static const struct {
608    const char* str;
609    string16::size_type start_offset;
610    const char* find_this;
611    const char* replace_with;
612    const char* expected;
613  } cases[] = {
614    {"aaa", 0, "a", "b", "baa"},
615    {"abb", 0, "ab", "a", "ab"},
616    {"Removing some substrings inging", 0, "ing", "",
617      "Remov some substrings inging"},
618    {"Not found", 0, "x", "0", "Not found"},
619    {"Not found again", 5, "x", "0", "Not found again"},
620    {" Making it much longer ", 0, " ", "Four score and seven years ago",
621     "Four score and seven years agoMaking it much longer "},
622    {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
623    {"Replace me only me once", 4, "me ", "", "Replace only me once"},
624    {"abababab", 2, "ab", "c", "abcabab"},
625  };
626
627  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
628    string16 str = ASCIIToUTF16(cases[i].str);
629    ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
630                                     ASCIIToUTF16(cases[i].find_this),
631                                     ASCIIToUTF16(cases[i].replace_with));
632    EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
633  }
634}
635
636namespace {
637
638template <typename INT>
639struct IntToStringTest {
640  INT num;
641  const char* sexpected;
642  const char* uexpected;
643};
644
645}  // namespace
646
647TEST(StringUtilTest, IntToString) {
648  static const IntToStringTest<int> int_tests[] = {
649      { 0, "0", "0" },
650      { -1, "-1", "4294967295" },
651      { std::numeric_limits<int>::max(), "2147483647", "2147483647" },
652      { std::numeric_limits<int>::min(), "-2147483648", "2147483648" },
653  };
654  static const IntToStringTest<int64> int64_tests[] = {
655      { 0, "0", "0" },
656      { -1, "-1", "18446744073709551615" },
657      { std::numeric_limits<int64>::max(),
658        "9223372036854775807",
659        "9223372036854775807", },
660      { std::numeric_limits<int64>::min(),
661        "-9223372036854775808",
662        "9223372036854775808" },
663  };
664
665  for (size_t i = 0; i < arraysize(int_tests); ++i) {
666    const IntToStringTest<int>* test = &int_tests[i];
667    EXPECT_EQ(IntToString(test->num), test->sexpected);
668    EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected));
669    EXPECT_EQ(UintToString(test->num), test->uexpected);
670    EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected));
671  }
672  for (size_t i = 0; i < arraysize(int64_tests); ++i) {
673    const IntToStringTest<int64>* test = &int64_tests[i];
674    EXPECT_EQ(Int64ToString(test->num), test->sexpected);
675    EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected));
676    EXPECT_EQ(Uint64ToString(test->num), test->uexpected);
677    EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected));
678  }
679}
680
681TEST(StringUtilTest, Uint64ToString) {
682  static const struct {
683    uint64 input;
684    std::string output;
685  } cases[] = {
686    {0, "0"},
687    {42, "42"},
688    {INT_MAX, "2147483647"},
689    {kuint64max, "18446744073709551615"},
690  };
691
692  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
693    EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input));
694}
695
696TEST(StringUtilTest, StringToInt) {
697  static const struct {
698    std::string input;
699    int output;
700    bool success;
701  } cases[] = {
702    {"0", 0, true},
703    {"42", 42, true},
704    {"-2147483648", INT_MIN, true},
705    {"2147483647", INT_MAX, true},
706    {"", 0, false},
707    {" 42", 42, false},
708    {"42 ", 42, false},
709    {"\t\n\v\f\r 42", 42, false},
710    {"blah42", 0, false},
711    {"42blah", 42, false},
712    {"blah42blah", 0, false},
713    {"-273.15", -273, false},
714    {"+98.6", 98, false},
715    {"--123", 0, false},
716    {"++123", 0, false},
717    {"-+123", 0, false},
718    {"+-123", 0, false},
719    {"-", 0, false},
720    {"-2147483649", INT_MIN, false},
721    {"-99999999999", INT_MIN, false},
722    {"2147483648", INT_MAX, false},
723    {"99999999999", INT_MAX, false},
724  };
725
726  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
727    EXPECT_EQ(cases[i].output, StringToInt(cases[i].input));
728    int output;
729    EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output));
730    EXPECT_EQ(cases[i].output, output);
731
732    std::wstring wide_input = ASCIIToWide(cases[i].input);
733    EXPECT_EQ(cases[i].output, StringToInt(WideToUTF16Hack(wide_input)));
734    EXPECT_EQ(cases[i].success, StringToInt(WideToUTF16Hack(wide_input),
735                                            &output));
736    EXPECT_EQ(cases[i].output, output);
737  }
738
739  // One additional test to verify that conversion of numbers in strings with
740  // embedded NUL characters.  The NUL and extra data after it should be
741  // interpreted as junk after the number.
742  const char input[] = "6\06";
743  std::string input_string(input, arraysize(input) - 1);
744  int output;
745  EXPECT_FALSE(StringToInt(input_string, &output));
746  EXPECT_EQ(6, output);
747
748  std::wstring wide_input = ASCIIToWide(input_string);
749  EXPECT_FALSE(StringToInt(WideToUTF16Hack(wide_input), &output));
750  EXPECT_EQ(6, output);
751}
752
753TEST(StringUtilTest, StringToInt64) {
754  static const struct {
755    std::string input;
756    int64 output;
757    bool success;
758  } cases[] = {
759    {"0", 0, true},
760    {"42", 42, true},
761    {"-2147483648", INT_MIN, true},
762    {"2147483647", INT_MAX, true},
763    {"-2147483649", GG_INT64_C(-2147483649), true},
764    {"-99999999999", GG_INT64_C(-99999999999), true},
765    {"2147483648", GG_INT64_C(2147483648), true},
766    {"99999999999", GG_INT64_C(99999999999), true},
767    {"9223372036854775807", kint64max, true},
768    {"-9223372036854775808", kint64min, true},
769    {"09", 9, true},
770    {"-09", -9, true},
771    {"", 0, false},
772    {" 42", 42, false},
773    {"42 ", 42, false},
774    {"\t\n\v\f\r 42", 42, false},
775    {"blah42", 0, false},
776    {"42blah", 42, false},
777    {"blah42blah", 0, false},
778    {"-273.15", -273, false},
779    {"+98.6", 98, false},
780    {"--123", 0, false},
781    {"++123", 0, false},
782    {"-+123", 0, false},
783    {"+-123", 0, false},
784    {"-", 0, false},
785    {"-9223372036854775809", kint64min, false},
786    {"-99999999999999999999", kint64min, false},
787    {"9223372036854775808", kint64max, false},
788    {"99999999999999999999", kint64max, false},
789  };
790
791  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
792    EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input));
793    int64 output;
794    EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output));
795    EXPECT_EQ(cases[i].output, output);
796
797    std::wstring wide_input = ASCIIToWide(cases[i].input);
798    EXPECT_EQ(cases[i].output, StringToInt64(WideToUTF16Hack(wide_input)));
799    EXPECT_EQ(cases[i].success, StringToInt64(WideToUTF16Hack(wide_input),
800                                              &output));
801    EXPECT_EQ(cases[i].output, output);
802  }
803
804  // One additional test to verify that conversion of numbers in strings with
805  // embedded NUL characters.  The NUL and extra data after it should be
806  // interpreted as junk after the number.
807  const char input[] = "6\06";
808  std::string input_string(input, arraysize(input) - 1);
809  int64 output;
810  EXPECT_FALSE(StringToInt64(input_string, &output));
811  EXPECT_EQ(6, output);
812
813  std::wstring wide_input = ASCIIToWide(input_string);
814  EXPECT_FALSE(StringToInt64(WideToUTF16Hack(wide_input), &output));
815  EXPECT_EQ(6, output);
816}
817
818TEST(StringUtilTest, HexStringToInt) {
819  static const struct {
820    std::string input;
821    int output;
822    bool success;
823  } cases[] = {
824    {"0", 0, true},
825    {"42", 66, true},
826    {"-42", -66, true},
827    {"+42", 66, true},
828    {"7fffffff", INT_MAX, true},
829    {"80000000", INT_MIN, true},
830    {"ffffffff", -1, true},
831    {"DeadBeef", 0xdeadbeef, true},
832    {"0x42", 66, true},
833    {"-0x42", -66, true},
834    {"+0x42", 66, true},
835    {"0x7fffffff", INT_MAX, true},
836    {"0x80000000", INT_MIN, true},
837    {"0xffffffff", -1, true},
838    {"0XDeadBeef", 0xdeadbeef, true},
839    {"0x0f", 15, true},
840    {"0f", 15, true},
841    {" 45", 0x45, false},
842    {"\t\n\v\f\r 0x45", 0x45, false},
843    {" 45", 0x45, false},
844    {"45 ", 0x45, false},
845    {"efgh", 0xef, false},
846    {"0xefgh", 0xef, false},
847    {"hgfe", 0, false},
848    {"100000000", -1, false},  // don't care about |output|, just |success|
849    {"-", 0, false},
850    {"", 0, false},
851  };
852
853  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
854    EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input));
855    int output;
856    EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output));
857    EXPECT_EQ(cases[i].output, output);
858
859    std::wstring wide_input = ASCIIToWide(cases[i].input);
860    EXPECT_EQ(cases[i].output, HexStringToInt(WideToUTF16Hack(wide_input)));
861    EXPECT_EQ(cases[i].success, HexStringToInt(WideToUTF16Hack(wide_input),
862                                               &output));
863    EXPECT_EQ(cases[i].output, output);
864  }
865  // One additional test to verify that conversion of numbers in strings with
866  // embedded NUL characters.  The NUL and extra data after it should be
867  // interpreted as junk after the number.
868  const char input[] = "0xc0ffee\09";
869  std::string input_string(input, arraysize(input) - 1);
870  int output;
871  EXPECT_FALSE(HexStringToInt(input_string, &output));
872  EXPECT_EQ(0xc0ffee, output);
873
874  std::wstring wide_input = ASCIIToWide(input_string);
875  EXPECT_FALSE(HexStringToInt(WideToUTF16Hack(wide_input), &output));
876  EXPECT_EQ(0xc0ffee, output);
877}
878
879TEST(StringUtilTest, HexStringToBytes) {
880  static const struct {
881    const std::string input;
882    const char* output;
883    size_t output_len;
884    bool success;
885  } cases[] = {
886    {"0", "", 0, false},  // odd number of characters fails
887    {"00", "\0", 1, true},
888    {"42", "\x42", 1, true},
889    {"-42", "", 0, false},  // any non-hex value fails
890    {"+42", "", 0, false},
891    {"7fffffff", "\x7f\xff\xff\xff", 4, true},
892    {"80000000", "\x80\0\0\0", 4, true},
893    {"deadbeef", "\xde\xad\xbe\xef", 4, true},
894    {"DeadBeef", "\xde\xad\xbe\xef", 4, true},
895    {"0x42", "", 0, false},  // leading 0x fails (x is not hex)
896    {"0f", "\xf", 1, true},
897    {"45  ", "\x45", 1, false},
898    {"efgh", "\xef", 1, false},
899    {"", "", 0, false},
900    {"0123456789ABCDEF", "\x01\x23\x45\x67\x89\xAB\xCD\xEF", 8, true},
901    {"0123456789ABCDEF012345",
902     "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true},
903  };
904
905
906  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
907    std::vector<uint8> output;
908    std::vector<uint8> compare;
909    EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) <<
910        i << ": " << cases[i].input;
911    for (size_t j = 0; j < cases[i].output_len; ++j)
912      compare.push_back(static_cast<uint8>(cases[i].output[j]));
913    ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
914    EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
915        i << ": " << cases[i].input;
916
917    output.clear();
918    compare.clear();
919
920    std::wstring wide_input = ASCIIToWide(cases[i].input);
921    EXPECT_EQ(cases[i].success,
922              HexStringToBytes(WideToUTF16Hack(wide_input), &output)) <<
923        i << ": " << cases[i].input;
924    for (size_t j = 0; j < cases[i].output_len; ++j)
925      compare.push_back(static_cast<uint8>(cases[i].output[j]));
926    ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
927    EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
928        i << ": " << cases[i].input;
929  }
930}
931
932TEST(StringUtilTest, StringToDouble) {
933  static const struct {
934    std::string input;
935    double output;
936    bool success;
937  } cases[] = {
938    {"0", 0.0, true},
939    {"42", 42.0, true},
940    {"-42", -42.0, true},
941    {"123.45", 123.45, true},
942    {"-123.45", -123.45, true},
943    {"+123.45", 123.45, true},
944    {"2.99792458e8", 299792458.0, true},
945    {"149597870.691E+3", 149597870691.0, true},
946    {"6.", 6.0, true},
947    {"9e99999999999999999999", HUGE_VAL, false},
948    {"-9e99999999999999999999", -HUGE_VAL, false},
949    {"1e-2", 0.01, true},
950    {" 1e-2", 0.01, false},
951    {"1e-2 ", 0.01, false},
952    {"-1E-7", -0.0000001, true},
953    {"01e02", 100, true},
954    {"2.3e15", 2.3e15, true},
955    {"\t\n\v\f\r -123.45e2", -12345.0, false},
956    {"+123 e4", 123.0, false},
957    {"123e ", 123.0, false},
958    {"123e", 123.0, false},
959    {" 2.99", 2.99, false},
960    {"1e3.4", 1000.0, false},
961    {"nothing", 0.0, false},
962    {"-", 0.0, false},
963    {"+", 0.0, false},
964    {"", 0.0, false},
965  };
966
967  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
968    EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input));
969    double output;
970    EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output));
971    EXPECT_DOUBLE_EQ(cases[i].output, output);
972
973    std::wstring wide_input = ASCIIToWide(cases[i].input);
974    EXPECT_DOUBLE_EQ(cases[i].output,
975                     StringToDouble(WideToUTF16Hack(wide_input)));
976    EXPECT_EQ(cases[i].success, StringToDouble(WideToUTF16Hack(wide_input),
977                                               &output));
978    EXPECT_DOUBLE_EQ(cases[i].output, output);
979  }
980
981  // One additional test to verify that conversion of numbers in strings with
982  // embedded NUL characters.  The NUL and extra data after it should be
983  // interpreted as junk after the number.
984  const char input[] = "3.14\0159";
985  std::string input_string(input, arraysize(input) - 1);
986  double output;
987  EXPECT_FALSE(StringToDouble(input_string, &output));
988  EXPECT_DOUBLE_EQ(3.14, output);
989
990  std::wstring wide_input = ASCIIToWide(input_string);
991  EXPECT_FALSE(StringToDouble(WideToUTF16Hack(wide_input), &output));
992  EXPECT_DOUBLE_EQ(3.14, output);
993}
994
995// This checks where we can use the assignment operator for a va_list. We need
996// a way to do this since Visual C doesn't support va_copy, but assignment on
997// va_list is not guaranteed to be a copy. See StringAppendVT which uses this
998// capability.
999static void VariableArgsFunc(const char* format, ...) {
1000  va_list org;
1001  va_start(org, format);
1002
1003  va_list dup;
1004  GG_VA_COPY(dup, org);
1005  int i1 = va_arg(org, int);
1006  int j1 = va_arg(org, int);
1007  char* s1 = va_arg(org, char*);
1008  double d1 = va_arg(org, double);
1009  va_end(org);
1010
1011  int i2 = va_arg(dup, int);
1012  int j2 = va_arg(dup, int);
1013  char* s2 = va_arg(dup, char*);
1014  double d2 = va_arg(dup, double);
1015
1016  EXPECT_EQ(i1, i2);
1017  EXPECT_EQ(j1, j2);
1018  EXPECT_STREQ(s1, s2);
1019  EXPECT_EQ(d1, d2);
1020
1021  va_end(dup);
1022}
1023
1024TEST(StringUtilTest, VAList) {
1025  VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
1026}
1027
1028TEST(StringUtilTest, StringPrintfEmpty) {
1029  EXPECT_EQ("", StringPrintf("%s", ""));
1030}
1031
1032TEST(StringUtilTest, StringPrintfMisc) {
1033  EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
1034  EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
1035}
1036
1037TEST(StringUtilTest, StringAppendfEmptyString) {
1038  std::string value("Hello");
1039  StringAppendF(&value, "%s", "");
1040  EXPECT_EQ("Hello", value);
1041
1042  std::wstring valuew(L"Hello");
1043  StringAppendF(&valuew, L"%ls", L"");
1044  EXPECT_EQ(L"Hello", valuew);
1045}
1046
1047TEST(StringUtilTest, StringAppendfString) {
1048  std::string value("Hello");
1049  StringAppendF(&value, " %s", "World");
1050  EXPECT_EQ("Hello World", value);
1051
1052  std::wstring valuew(L"Hello");
1053  StringAppendF(&valuew, L" %ls", L"World");
1054  EXPECT_EQ(L"Hello World", valuew);
1055}
1056
1057TEST(StringUtilTest, StringAppendfInt) {
1058  std::string value("Hello");
1059  StringAppendF(&value, " %d", 123);
1060  EXPECT_EQ("Hello 123", value);
1061
1062  std::wstring valuew(L"Hello");
1063  StringAppendF(&valuew, L" %d", 123);
1064  EXPECT_EQ(L"Hello 123", valuew);
1065}
1066
1067// Make sure that lengths exactly around the initial buffer size are handled
1068// correctly.
1069TEST(StringUtilTest, StringPrintfBounds) {
1070  const int kSrcLen = 1026;
1071  char src[kSrcLen];
1072  for (size_t i = 0; i < arraysize(src); i++)
1073    src[i] = 'A';
1074
1075  wchar_t srcw[kSrcLen];
1076  for (size_t i = 0; i < arraysize(srcw); i++)
1077    srcw[i] = 'A';
1078
1079  for (int i = 1; i < 3; i++) {
1080    src[kSrcLen - i] = 0;
1081    std::string out;
1082    SStringPrintf(&out, "%s", src);
1083    EXPECT_STREQ(src, out.c_str());
1084
1085    srcw[kSrcLen - i] = 0;
1086    std::wstring outw;
1087    SStringPrintf(&outw, L"%ls", srcw);
1088    EXPECT_STREQ(srcw, outw.c_str());
1089  }
1090}
1091
1092// Test very large sprintfs that will cause the buffer to grow.
1093TEST(StringUtilTest, Grow) {
1094  char src[1026];
1095  for (size_t i = 0; i < arraysize(src); i++)
1096    src[i] = 'A';
1097  src[1025] = 0;
1098
1099  const char* fmt = "%sB%sB%sB%sB%sB%sB%s";
1100
1101  std::string out;
1102  SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
1103
1104  const int kRefSize = 320000;
1105  char* ref = new char[kRefSize];
1106#if defined(OS_WIN)
1107  sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src);
1108#elif defined(OS_POSIX)
1109  snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src);
1110#endif
1111
1112  EXPECT_STREQ(ref, out.c_str());
1113  delete[] ref;
1114}
1115
1116// A helper for the StringAppendV test that follows.
1117// Just forwards its args to StringAppendV.
1118static void StringAppendVTestHelper(std::string* out,
1119                                    const char* format,
1120                                    ...) PRINTF_FORMAT(2, 3);
1121
1122static void StringAppendVTestHelper(std::string* out, const char* format, ...) {
1123  va_list ap;
1124  va_start(ap, format);
1125  StringAppendV(out, format, ap);
1126  va_end(ap);
1127}
1128
1129TEST(StringUtilTest, StringAppendV) {
1130  std::string out;
1131  StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
1132  EXPECT_EQ("1 foo bar", out);
1133}
1134
1135// Test the boundary condition for the size of the string_util's
1136// internal buffer.
1137TEST(StringUtilTest, GrowBoundary) {
1138  const int string_util_buf_len = 1024;
1139  // Our buffer should be one larger than the size of StringAppendVT's stack
1140  // buffer.
1141  const int buf_len = string_util_buf_len + 1;
1142  char src[buf_len + 1];  // Need extra one for NULL-terminator.
1143  for (int i = 0; i < buf_len; ++i)
1144    src[i] = 'a';
1145  src[buf_len] = 0;
1146
1147  std::string out;
1148  SStringPrintf(&out, "%s", src);
1149
1150  EXPECT_STREQ(src, out.c_str());
1151}
1152
1153// TODO(evanm): what's the proper cross-platform test here?
1154#if defined(OS_WIN)
1155// sprintf in Visual Studio fails when given U+FFFF. This tests that the
1156// failure case is gracefuly handled.
1157TEST(StringUtilTest, Invalid) {
1158  wchar_t invalid[2];
1159  invalid[0] = 0xffff;
1160  invalid[1] = 0;
1161
1162  std::wstring out;
1163  SStringPrintf(&out, L"%ls", invalid);
1164  EXPECT_STREQ(L"", out.c_str());
1165}
1166#endif
1167
1168// Test for SplitString
1169TEST(StringUtilTest, SplitString) {
1170  std::vector<std::wstring> r;
1171
1172  SplitString(L"", L',', &r);
1173  ASSERT_EQ(1U, r.size());
1174  EXPECT_EQ(r[0], L"");
1175  r.clear();
1176
1177  SplitString(L"a,b,c", L',', &r);
1178  ASSERT_EQ(3U, r.size());
1179  EXPECT_EQ(r[0], L"a");
1180  EXPECT_EQ(r[1], L"b");
1181  EXPECT_EQ(r[2], L"c");
1182  r.clear();
1183
1184  SplitString(L"a, b, c", L',', &r);
1185  ASSERT_EQ(3U, r.size());
1186  EXPECT_EQ(r[0], L"a");
1187  EXPECT_EQ(r[1], L"b");
1188  EXPECT_EQ(r[2], L"c");
1189  r.clear();
1190
1191  SplitString(L"a,,c", L',', &r);
1192  ASSERT_EQ(3U, r.size());
1193  EXPECT_EQ(r[0], L"a");
1194  EXPECT_EQ(r[1], L"");
1195  EXPECT_EQ(r[2], L"c");
1196  r.clear();
1197
1198  SplitString(L"", L'*', &r);
1199  ASSERT_EQ(1U, r.size());
1200  EXPECT_EQ(r[0], L"");
1201  r.clear();
1202
1203  SplitString(L"foo", L'*', &r);
1204  ASSERT_EQ(1U, r.size());
1205  EXPECT_EQ(r[0], L"foo");
1206  r.clear();
1207
1208  SplitString(L"foo ,", L',', &r);
1209  ASSERT_EQ(2U, r.size());
1210  EXPECT_EQ(r[0], L"foo");
1211  EXPECT_EQ(r[1], L"");
1212  r.clear();
1213
1214  SplitString(L",", L',', &r);
1215  ASSERT_EQ(2U, r.size());
1216  EXPECT_EQ(r[0], L"");
1217  EXPECT_EQ(r[1], L"");
1218  r.clear();
1219
1220  SplitString(L"\t\ta\t", L'\t', &r);
1221  ASSERT_EQ(4U, r.size());
1222  EXPECT_EQ(r[0], L"");
1223  EXPECT_EQ(r[1], L"");
1224  EXPECT_EQ(r[2], L"a");
1225  EXPECT_EQ(r[3], L"");
1226  r.clear();
1227
1228  SplitStringDontTrim(L"\t\ta\t", L'\t', &r);
1229  ASSERT_EQ(4U, r.size());
1230  EXPECT_EQ(r[0], L"");
1231  EXPECT_EQ(r[1], L"");
1232  EXPECT_EQ(r[2], L"a");
1233  EXPECT_EQ(r[3], L"");
1234  r.clear();
1235
1236  SplitString(L"\ta\t\nb\tcc", L'\n', &r);
1237  ASSERT_EQ(2U, r.size());
1238  EXPECT_EQ(r[0], L"a");
1239  EXPECT_EQ(r[1], L"b\tcc");
1240  r.clear();
1241
1242  SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r);
1243  ASSERT_EQ(2U, r.size());
1244  EXPECT_EQ(r[0], L"\ta\t");
1245  EXPECT_EQ(r[1], L"b\tcc");
1246  r.clear();
1247}
1248
1249// Test for Tokenize
1250template <typename STR>
1251void TokenizeTest() {
1252  std::vector<STR> r;
1253  size_t size;
1254
1255  size = Tokenize(STR("This is a string"), STR(" "), &r);
1256  EXPECT_EQ(4U, size);
1257  ASSERT_EQ(4U, r.size());
1258  EXPECT_EQ(r[0], STR("This"));
1259  EXPECT_EQ(r[1], STR("is"));
1260  EXPECT_EQ(r[2], STR("a"));
1261  EXPECT_EQ(r[3], STR("string"));
1262  r.clear();
1263
1264  size = Tokenize(STR("one,two,three"), STR(","), &r);
1265  EXPECT_EQ(3U, size);
1266  ASSERT_EQ(3U, r.size());
1267  EXPECT_EQ(r[0], STR("one"));
1268  EXPECT_EQ(r[1], STR("two"));
1269  EXPECT_EQ(r[2], STR("three"));
1270  r.clear();
1271
1272  size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
1273  EXPECT_EQ(3U, size);
1274  ASSERT_EQ(3U, r.size());
1275  EXPECT_EQ(r[0], STR("one"));
1276  EXPECT_EQ(r[1], STR("two"));
1277  EXPECT_EQ(r[2], STR("three;four"));
1278  r.clear();
1279
1280  size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
1281  EXPECT_EQ(4U, size);
1282  ASSERT_EQ(4U, r.size());
1283  EXPECT_EQ(r[0], STR("one"));
1284  EXPECT_EQ(r[1], STR("two"));
1285  EXPECT_EQ(r[2], STR("three"));
1286  EXPECT_EQ(r[3], STR("four"));
1287  r.clear();
1288
1289  size = Tokenize(STR("one, two, three"), STR(","), &r);
1290  EXPECT_EQ(3U, size);
1291  ASSERT_EQ(3U, r.size());
1292  EXPECT_EQ(r[0], STR("one"));
1293  EXPECT_EQ(r[1], STR(" two"));
1294  EXPECT_EQ(r[2], STR(" three"));
1295  r.clear();
1296
1297  size = Tokenize(STR("one, two, three, "), STR(","), &r);
1298  EXPECT_EQ(4U, size);
1299  ASSERT_EQ(4U, r.size());
1300  EXPECT_EQ(r[0], STR("one"));
1301  EXPECT_EQ(r[1], STR(" two"));
1302  EXPECT_EQ(r[2], STR(" three"));
1303  EXPECT_EQ(r[3], STR(" "));
1304  r.clear();
1305
1306  size = Tokenize(STR("one, two, three,"), STR(","), &r);
1307  EXPECT_EQ(3U, size);
1308  ASSERT_EQ(3U, r.size());
1309  EXPECT_EQ(r[0], STR("one"));
1310  EXPECT_EQ(r[1], STR(" two"));
1311  EXPECT_EQ(r[2], STR(" three"));
1312  r.clear();
1313
1314  size = Tokenize(STR(""), STR(","), &r);
1315  EXPECT_EQ(0U, size);
1316  ASSERT_EQ(0U, r.size());
1317  r.clear();
1318
1319  size = Tokenize(STR(","), STR(","), &r);
1320  EXPECT_EQ(0U, size);
1321  ASSERT_EQ(0U, r.size());
1322  r.clear();
1323
1324  size = Tokenize(STR(",;:."), STR(".:;,"), &r);
1325  EXPECT_EQ(0U, size);
1326  ASSERT_EQ(0U, r.size());
1327  r.clear();
1328
1329  size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
1330  EXPECT_EQ(1U, size);
1331  ASSERT_EQ(1U, r.size());
1332  EXPECT_EQ(r[0], STR("a"));
1333  r.clear();
1334
1335  size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
1336  EXPECT_EQ(2U, size);
1337  ASSERT_EQ(2U, r.size());
1338  EXPECT_EQ(r[0], STR("\ta\t"));
1339  EXPECT_EQ(r[1], STR("b\tcc"));
1340  r.clear();
1341}
1342
1343TEST(StringUtilTest, TokenizeStdString) {
1344  TokenizeTest<std::string>();
1345}
1346
1347TEST(StringUtilTest, TokenizeStringPiece) {
1348  TokenizeTest<base::StringPiece>();
1349}
1350
1351// Test for JoinString
1352TEST(StringUtilTest, JoinString) {
1353  std::vector<std::string> in;
1354  EXPECT_EQ("", JoinString(in, ','));
1355
1356  in.push_back("a");
1357  EXPECT_EQ("a", JoinString(in, ','));
1358
1359  in.push_back("b");
1360  in.push_back("c");
1361  EXPECT_EQ("a,b,c", JoinString(in, ','));
1362
1363  in.push_back("");
1364  EXPECT_EQ("a,b,c,", JoinString(in, ','));
1365  in.push_back(" ");
1366  EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
1367}
1368
1369TEST(StringUtilTest, StartsWith) {
1370  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
1371  EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
1372  EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
1373  EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
1374  EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
1375  EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
1376  EXPECT_FALSE(StartsWithASCII("", "javascript", false));
1377  EXPECT_FALSE(StartsWithASCII("", "javascript", true));
1378  EXPECT_TRUE(StartsWithASCII("java", "", false));
1379  EXPECT_TRUE(StartsWithASCII("java", "", true));
1380
1381  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
1382  EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
1383  EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
1384  EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
1385  EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
1386  EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
1387  EXPECT_FALSE(StartsWith(L"", L"javascript", false));
1388  EXPECT_FALSE(StartsWith(L"", L"javascript", true));
1389  EXPECT_TRUE(StartsWith(L"java", L"", false));
1390  EXPECT_TRUE(StartsWith(L"java", L"", true));
1391}
1392
1393TEST(StringUtilTest, EndsWith) {
1394  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
1395  EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
1396  EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
1397  EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
1398  EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
1399  EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
1400  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
1401  EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
1402  EXPECT_FALSE(EndsWith(L"", L".plugin", false));
1403  EXPECT_FALSE(EndsWith(L"", L".plugin", true));
1404  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
1405  EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
1406  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
1407  EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
1408  EXPECT_TRUE(EndsWith(L"", L"", false));
1409  EXPECT_TRUE(EndsWith(L"", L"", true));
1410}
1411
1412TEST(StringUtilTest, GetStringFWithOffsets) {
1413  std::vector<string16> subst;
1414  subst.push_back(ASCIIToUTF16("1"));
1415  subst.push_back(ASCIIToUTF16("2"));
1416  std::vector<size_t> offsets;
1417
1418  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
1419                            subst,
1420                            &offsets);
1421  EXPECT_EQ(2U, offsets.size());
1422  EXPECT_EQ(7U, offsets[0]);
1423  EXPECT_EQ(25U, offsets[1]);
1424  offsets.clear();
1425
1426  ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
1427                            subst,
1428                            &offsets);
1429  EXPECT_EQ(2U, offsets.size());
1430  EXPECT_EQ(25U, offsets[0]);
1431  EXPECT_EQ(7U, offsets[1]);
1432  offsets.clear();
1433}
1434
1435TEST(StringUtilTest, ReplaceStringPlaceholders) {
1436  std::vector<string16> subst;
1437  subst.push_back(ASCIIToUTF16("9a"));
1438  subst.push_back(ASCIIToUTF16("8b"));
1439  subst.push_back(ASCIIToUTF16("7c"));
1440  subst.push_back(ASCIIToUTF16("6d"));
1441  subst.push_back(ASCIIToUTF16("5e"));
1442  subst.push_back(ASCIIToUTF16("4f"));
1443  subst.push_back(ASCIIToUTF16("3g"));
1444  subst.push_back(ASCIIToUTF16("2h"));
1445  subst.push_back(ASCIIToUTF16("1i"));
1446
1447  string16 formatted =
1448      ReplaceStringPlaceholders(
1449          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
1450
1451  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
1452}
1453
1454TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
1455  // Test whether replacestringplaceholders works as expected when there
1456  // are fewer inputs than outputs.
1457  std::vector<string16> subst;
1458  subst.push_back(ASCIIToUTF16("9a"));
1459  subst.push_back(ASCIIToUTF16("8b"));
1460  subst.push_back(ASCIIToUTF16("7c"));
1461
1462  string16 formatted =
1463      ReplaceStringPlaceholders(
1464          ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
1465
1466  EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
1467}
1468
1469TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
1470  std::vector<std::string> subst;
1471  subst.push_back("9a");
1472  subst.push_back("8b");
1473  subst.push_back("7c");
1474  subst.push_back("6d");
1475  subst.push_back("5e");
1476  subst.push_back("4f");
1477  subst.push_back("3g");
1478  subst.push_back("2h");
1479  subst.push_back("1i");
1480
1481  std::string formatted =
1482      ReplaceStringPlaceholders(
1483          "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
1484
1485  EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
1486}
1487
1488TEST(StringUtilTest, SplitStringAlongWhitespace) {
1489  struct TestData {
1490    const std::wstring input;
1491    const size_t expected_result_count;
1492    const std::wstring output1;
1493    const std::wstring output2;
1494  } data[] = {
1495    { L"a",       1, L"a",  L""   },
1496    { L" ",       0, L"",   L""   },
1497    { L" a",      1, L"a",  L""   },
1498    { L" ab ",    1, L"ab", L""   },
1499    { L" ab c",   2, L"ab", L"c"  },
1500    { L" ab c ",  2, L"ab", L"c"  },
1501    { L" ab cd",  2, L"ab", L"cd" },
1502    { L" ab cd ", 2, L"ab", L"cd" },
1503    { L" \ta\t",  1, L"a",  L""   },
1504    { L" b\ta\t", 2, L"b",  L"a"  },
1505    { L" b\tat",  2, L"b",  L"at" },
1506    { L"b\tat",   2, L"b",  L"at" },
1507    { L"b\t at",  2, L"b",  L"at" },
1508  };
1509  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
1510    std::vector<std::wstring> results;
1511    SplitStringAlongWhitespace(data[i].input, &results);
1512    ASSERT_EQ(data[i].expected_result_count, results.size());
1513    if (data[i].expected_result_count > 0)
1514      ASSERT_EQ(data[i].output1, results[0]);
1515    if (data[i].expected_result_count > 1)
1516      ASSERT_EQ(data[i].output2, results[1]);
1517  }
1518}
1519
1520TEST(StringUtilTest, MatchPatternTest) {
1521  EXPECT_EQ(MatchPatternASCII("www.google.com", "*.com"), true);
1522  EXPECT_EQ(MatchPatternASCII("www.google.com", "*"), true);
1523  EXPECT_EQ(MatchPatternASCII("www.google.com", "www*.g*.org"), false);
1524  EXPECT_EQ(MatchPatternASCII("Hello", "H?l?o"), true);
1525  EXPECT_EQ(MatchPatternASCII("www.google.com", "http://*)"), false);
1526  EXPECT_EQ(MatchPatternASCII("www.msn.com", "*.COM"), false);
1527  EXPECT_EQ(MatchPatternASCII("Hello*1234", "He??o\\*1*"), true);
1528  EXPECT_EQ(MatchPatternASCII("", "*.*"), false);
1529  EXPECT_EQ(MatchPatternASCII("", "*"), true);
1530  EXPECT_EQ(MatchPatternASCII("", "?"), true);
1531  EXPECT_EQ(MatchPatternASCII("", ""), true);
1532  EXPECT_EQ(MatchPatternASCII("Hello", ""), false);
1533  EXPECT_EQ(MatchPatternASCII("Hello*", "Hello*"), true);
1534  // Stop after a certain recursion depth.
1535  EXPECT_EQ(MatchPatternASCII("12345678901234567890", "???????????????????*"),
1536                              false);
1537}
1538
1539TEST(StringUtilTest, LcpyTest) {
1540  // Test the normal case where we fit in our buffer.
1541  {
1542    char dst[10];
1543    wchar_t wdst[10];
1544    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1545    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1546    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1547    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1548  }
1549
1550  // Test dst_size == 0, nothing should be written to |dst| and we should
1551  // have the equivalent of strlen(src).
1552  {
1553    char dst[2] = {1, 2};
1554    wchar_t wdst[2] = {1, 2};
1555    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1556    EXPECT_EQ(1, dst[0]);
1557    EXPECT_EQ(2, dst[1]);
1558    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1559#if defined(WCHAR_T_IS_UNSIGNED)
1560    EXPECT_EQ(1U, wdst[0]);
1561    EXPECT_EQ(2U, wdst[1]);
1562#else
1563    EXPECT_EQ(1, wdst[0]);
1564    EXPECT_EQ(2, wdst[1]);
1565#endif
1566  }
1567
1568  // Test the case were we _just_ competely fit including the null.
1569  {
1570    char dst[8];
1571    wchar_t wdst[8];
1572    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1573    EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1574    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1575    EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1576  }
1577
1578  // Test the case were we we are one smaller, so we can't fit the null.
1579  {
1580    char dst[7];
1581    wchar_t wdst[7];
1582    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1583    EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1584    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1585    EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1586  }
1587
1588  // Test the case were we are just too small.
1589  {
1590    char dst[3];
1591    wchar_t wdst[3];
1592    EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1593    EXPECT_EQ(0, memcmp(dst, "ab", 3));
1594    EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1595    EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1596  }
1597}
1598
1599TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1600  struct TestData {
1601    const wchar_t* input;
1602    bool portable;
1603  } cases[] = {
1604    { L"%ls", true },
1605    { L"%s", false },
1606    { L"%S", false },
1607    { L"%lS", false },
1608    { L"Hello, %s", false },
1609    { L"%lc", true },
1610    { L"%c", false },
1611    { L"%C", false },
1612    { L"%lC", false },
1613    { L"%ls %s", false },
1614    { L"%s %ls", false },
1615    { L"%s %ls %s", false },
1616    { L"%f", true },
1617    { L"%f %F", false },
1618    { L"%d %D", false },
1619    { L"%o %O", false },
1620    { L"%u %U", false },
1621    { L"%f %d %o %u", true },
1622    { L"%-8d (%02.1f%)", true },
1623    { L"% 10s", false },
1624    { L"% 10ls", true }
1625  };
1626  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1627    EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1628  }
1629}
1630
1631TEST(StringUtilTest, ElideString) {
1632  struct TestData {
1633    const wchar_t* input;
1634    int max_len;
1635    bool result;
1636    const wchar_t* output;
1637  } cases[] = {
1638    { L"Hello", 0, true, L"" },
1639    { L"", 0, false, L"" },
1640    { L"Hello, my name is Tom", 1, true, L"H" },
1641    { L"Hello, my name is Tom", 2, true, L"He" },
1642    { L"Hello, my name is Tom", 3, true, L"H.m" },
1643    { L"Hello, my name is Tom", 4, true, L"H..m" },
1644    { L"Hello, my name is Tom", 5, true, L"H...m" },
1645    { L"Hello, my name is Tom", 6, true, L"He...m" },
1646    { L"Hello, my name is Tom", 7, true, L"He...om" },
1647    { L"Hello, my name is Tom", 10, true, L"Hell...Tom" },
1648    { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" }
1649  };
1650  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1651    std::wstring output;
1652    EXPECT_EQ(cases[i].result,
1653              ElideString(cases[i].input, cases[i].max_len, &output));
1654    EXPECT_TRUE(output == cases[i].output);
1655  }
1656}
1657
1658TEST(StringUtilTest, HexEncode) {
1659  std::string hex(HexEncode(NULL, 0));
1660  EXPECT_EQ(hex.length(), 0U);
1661  unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};
1662  hex = HexEncode(bytes, sizeof(bytes));
1663  EXPECT_EQ(hex.compare("01FF02FE038081"), 0);
1664}
1665
1666TEST(StringUtilTest, RemoveChars) {
1667  const char* kRemoveChars = "-/+*";
1668  std::string input = "A-+bc/d!*";
1669  EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1670  EXPECT_EQ("Abcd!", input);
1671
1672  // No characters match kRemoveChars.
1673  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1674  EXPECT_EQ("Abcd!", input);
1675
1676  // Empty string.
1677  input.clear();
1678  EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1679  EXPECT_EQ(std::string(), input);
1680}
1681
1682TEST(StringUtilTest, ContainsOnlyChars) {
1683  // Providing an empty list of characters should return false but for the empty
1684  // string.
1685  EXPECT_TRUE(ContainsOnlyChars("", ""));
1686  EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1687
1688  EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1689  EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1690  EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1691  EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1692  EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1693}
1694
1695TEST(SplitStringUsingSubstrTest, EmptyString) {
1696  std::vector<std::string> results;
1697  SplitStringUsingSubstr("", "DELIMITER", &results);
1698  ASSERT_EQ(1u, results.size());
1699  EXPECT_THAT(results, ElementsAre(""));
1700}
1701
1702TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
1703  std::vector<std::string> results;
1704  SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
1705  ASSERT_EQ(1u, results.size());
1706  EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
1707}
1708
1709TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
1710  std::vector<std::string> results;
1711  SplitStringUsingSubstr(
1712      "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
1713      "DELIMITER",
1714      &results);
1715  ASSERT_EQ(6u, results.size());
1716  EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
1717}
1718
1719TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
1720  std::vector<std::string> results;
1721  SplitStringUsingSubstr(
1722      "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
1723      "DELIMITER",
1724      &results);
1725  ASSERT_EQ(7u, results.size());
1726  EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
1727}
1728
1729TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
1730  std::vector<std::string> results;
1731  SplitStringUsingSubstr(
1732      "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
1733      "DELIMITER",
1734      &results);
1735  ASSERT_EQ(7u, results.size());
1736  EXPECT_THAT(
1737      results, ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
1738}
1739
1740}  // namespace base
1741