string_util_unittest.cc revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <math.h> 6#include <stdarg.h> 7 8#include <limits> 9#include <sstream> 10 11#include "base/basictypes.h" 12#include "base/string_util.h" 13#include "base/utf_string_conversions.h" 14#include "testing/gmock/include/gmock/gmock.h" 15#include "testing/gtest/include/gtest/gtest.h" 16 17using ::testing::ElementsAre; 18 19namespace base { 20 21namespace { 22 23// Given a null-terminated string of wchar_t with each wchar_t representing 24// a UTF-16 code unit, returns a string16 made up of wchar_t's in the input. 25// Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF) 26// should be represented as a surrogate pair (two UTF-16 units) 27// *even* where wchar_t is 32-bit (Linux and Mac). 28// 29// This is to help write tests for functions with string16 params until 30// the C++ 0x UTF-16 literal is well-supported by compilers. 31string16 BuildString16(const wchar_t* s) { 32#if defined(WCHAR_T_IS_UTF16) 33 return string16(s); 34#elif defined(WCHAR_T_IS_UTF32) 35 string16 u16; 36 while (*s != 0) { 37 DCHECK_LE(static_cast<unsigned int>(*s), 0xFFFFu); 38 u16.push_back(*s++); 39 } 40 return u16; 41#endif 42} 43 44} // namespace 45 46static const struct trim_case { 47 const wchar_t* input; 48 const TrimPositions positions; 49 const wchar_t* output; 50 const TrimPositions return_value; 51} trim_cases[] = { 52 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 53 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 54 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 55 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 56 {L"", TRIM_ALL, L"", TRIM_NONE}, 57 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 58 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 59 {L" ", TRIM_ALL, L"", TRIM_ALL}, 60 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 61 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 62}; 63 64static const struct trim_case_ascii { 65 const char* input; 66 const TrimPositions positions; 67 const char* output; 68 const TrimPositions return_value; 69} trim_cases_ascii[] = { 70 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 71 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 72 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 73 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 74 {"", TRIM_ALL, "", TRIM_NONE}, 75 {" ", TRIM_LEADING, "", TRIM_LEADING}, 76 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 77 {" ", TRIM_ALL, "", TRIM_ALL}, 78 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 79}; 80 81namespace { 82 83// Helper used to test TruncateUTF8ToByteSize. 84bool Truncated(const std::string& input, const size_t byte_size, 85 std::string* output) { 86 size_t prev = input.length(); 87 TruncateUTF8ToByteSize(input, byte_size, output); 88 return prev != output->length(); 89} 90 91} // namespace 92 93TEST(StringUtilTest, TruncateUTF8ToByteSize) { 94 std::string output; 95 96 // Empty strings and invalid byte_size arguments 97 EXPECT_FALSE(Truncated("", 0, &output)); 98 EXPECT_EQ(output, ""); 99 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); 100 EXPECT_EQ(output, ""); 101 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output)); 102 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); 103 104 // Testing the truncation of valid UTF8 correctly 105 EXPECT_TRUE(Truncated("abc", 2, &output)); 106 EXPECT_EQ(output, "ab"); 107 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); 108 EXPECT_EQ(output.compare("\xc2\x81"), 0); 109 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); 110 EXPECT_EQ(output.compare("\xc2\x81"), 0); 111 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); 112 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); 113 114 { 115 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; 116 const std::string array_string(array, arraysize(array)); 117 EXPECT_TRUE(Truncated(array_string, 4, &output)); 118 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); 119 } 120 121 { 122 const char array[] = "\x00\xc2\x81\xc2\x81"; 123 const std::string array_string(array, arraysize(array)); 124 EXPECT_TRUE(Truncated(array_string, 4, &output)); 125 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); 126 } 127 128 // Testing invalid UTF8 129 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); 130 EXPECT_EQ(output.compare(""), 0); 131 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); 132 EXPECT_EQ(output.compare(""), 0); 133 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); 134 EXPECT_EQ(output.compare(""), 0); 135 136 // Testing invalid UTF8 mixed with valid UTF8 137 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); 138 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); 139 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); 140 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); 141 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 142 10, &output)); 143 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); 144 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", 145 10, &output)); 146 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); 147 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); 148 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); 149 150 // Overlong sequences 151 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); 152 EXPECT_EQ(output.compare(""), 0); 153 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); 154 EXPECT_EQ(output.compare(""), 0); 155 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); 156 EXPECT_EQ(output.compare(""), 0); 157 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); 158 EXPECT_EQ(output.compare(""), 0); 159 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); 160 EXPECT_EQ(output.compare(""), 0); 161 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); 162 EXPECT_EQ(output.compare(""), 0); 163 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); 164 EXPECT_EQ(output.compare(""), 0); 165 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); 166 EXPECT_EQ(output.compare(""), 0); 167 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); 168 EXPECT_EQ(output.compare(""), 0); 169 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); 170 EXPECT_EQ(output.compare(""), 0); 171 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); 172 EXPECT_EQ(output.compare(""), 0); 173 174 // Beyond U+10FFFF (the upper limit of Unicode codespace) 175 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); 176 EXPECT_EQ(output.compare(""), 0); 177 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); 178 EXPECT_EQ(output.compare(""), 0); 179 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); 180 EXPECT_EQ(output.compare(""), 0); 181 182 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 183 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); 184 EXPECT_EQ(output.compare(""), 0); 185 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); 186 EXPECT_EQ(output.compare(""), 0); 187 188 { 189 const char array[] = "\x00\x00\xfe\xff"; 190 const std::string array_string(array, arraysize(array)); 191 EXPECT_TRUE(Truncated(array_string, 4, &output)); 192 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); 193 } 194 195 // Variants on the previous test 196 { 197 const char array[] = "\xff\xfe\x00\x00"; 198 const std::string array_string(array, 4); 199 EXPECT_FALSE(Truncated(array_string, 4, &output)); 200 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); 201 } 202 { 203 const char array[] = "\xff\x00\x00\xfe"; 204 const std::string array_string(array, arraysize(array)); 205 EXPECT_TRUE(Truncated(array_string, 4, &output)); 206 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); 207 } 208 209 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 210 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); 211 EXPECT_EQ(output.compare(""), 0); 212 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); 213 EXPECT_EQ(output.compare(""), 0); 214 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); 215 EXPECT_EQ(output.compare(""), 0); 216 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); 217 EXPECT_EQ(output.compare(""), 0); 218 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); 219 EXPECT_EQ(output.compare(""), 0); 220 221 // Strings in legacy encodings that are valid in UTF-8, but 222 // are invalid as UTF-8 in real data. 223 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); 224 EXPECT_EQ(output.compare("caf"), 0); 225 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); 226 EXPECT_EQ(output.compare(""), 0); 227 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); 228 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 229 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, 230 &output)); 231 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 232 233 // Testing using the same string as input and output. 234 EXPECT_FALSE(Truncated(output, 4, &output)); 235 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 236 EXPECT_TRUE(Truncated(output, 3, &output)); 237 EXPECT_EQ(output.compare("\xa7\x41"), 0); 238 239 // "abc" with U+201[CD] in windows-125[0-8] 240 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); 241 EXPECT_EQ(output.compare("\x93" "abc"), 0); 242 243 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 244 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); 245 EXPECT_EQ(output.compare(""), 0); 246 247 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 248 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); 249 EXPECT_EQ(output.compare(""), 0); 250} 251 252TEST(StringUtilTest, TrimWhitespace) { 253 std::wstring output; // Allow contents to carry over to next testcase 254 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 255 const trim_case& value = trim_cases[i]; 256 EXPECT_EQ(value.return_value, 257 TrimWhitespace(value.input, value.positions, &output)); 258 EXPECT_EQ(value.output, output); 259 } 260 261 // Test that TrimWhitespace() can take the same string for input and output 262 output = L" This is a test \r\n"; 263 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 264 EXPECT_EQ(L"This is a test", output); 265 266 // Once more, but with a string of whitespace 267 output = L" \r\n"; 268 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 269 EXPECT_EQ(L"", output); 270 271 std::string output_ascii; 272 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 273 const trim_case_ascii& value = trim_cases_ascii[i]; 274 EXPECT_EQ(value.return_value, 275 TrimWhitespace(value.input, value.positions, &output_ascii)); 276 EXPECT_EQ(value.output, output_ascii); 277 } 278} 279 280static const struct collapse_case { 281 const wchar_t* input; 282 const bool trim; 283 const wchar_t* output; 284} collapse_cases[] = { 285 {L" Google Video ", false, L"Google Video"}, 286 {L"Google Video", false, L"Google Video"}, 287 {L"", false, L""}, 288 {L" ", false, L""}, 289 {L"\t\rTest String\n", false, L"Test String"}, 290 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 291 {L" Test \n \t String ", false, L"Test String"}, 292 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 293 {L" Test String", false, L"Test String"}, 294 {L"Test String ", false, L"Test String"}, 295 {L"Test String", false, L"Test String"}, 296 {L"", true, L""}, 297 {L"\n", true, L""}, 298 {L" \r ", true, L""}, 299 {L"\nFoo", true, L"Foo"}, 300 {L"\r Foo ", true, L"Foo"}, 301 {L" Foo bar ", true, L"Foo bar"}, 302 {L" \tFoo bar \n", true, L"Foo bar"}, 303 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 304}; 305 306TEST(StringUtilTest, CollapseWhitespace) { 307 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 308 const collapse_case& value = collapse_cases[i]; 309 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); 310 } 311} 312 313static const struct collapse_case_ascii { 314 const char* input; 315 const bool trim; 316 const char* output; 317} collapse_cases_ascii[] = { 318 {" Google Video ", false, "Google Video"}, 319 {"Google Video", false, "Google Video"}, 320 {"", false, ""}, 321 {" ", false, ""}, 322 {"\t\rTest String\n", false, "Test String"}, 323 {" Test \n \t String ", false, "Test String"}, 324 {" Test String", false, "Test String"}, 325 {"Test String ", false, "Test String"}, 326 {"Test String", false, "Test String"}, 327 {"", true, ""}, 328 {"\n", true, ""}, 329 {" \r ", true, ""}, 330 {"\nFoo", true, "Foo"}, 331 {"\r Foo ", true, "Foo"}, 332 {" Foo bar ", true, "Foo bar"}, 333 {" \tFoo bar \n", true, "Foo bar"}, 334 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 335}; 336 337TEST(StringUtilTest, CollapseWhitespaceASCII) { 338 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 339 const collapse_case_ascii& value = collapse_cases_ascii[i]; 340 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 341 } 342} 343 344TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) { 345 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("")); 346 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" ")); 347 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t")); 348 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n ")); 349 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a")); 350 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n ")); 351} 352 353TEST(StringUtilTest, ContainsOnlyWhitespace) { 354 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(""))); 355 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" "))); 356 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t"))); 357 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n "))); 358 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a"))); 359 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n "))); 360} 361 362TEST(StringUtilTest, IsStringUTF8) { 363 EXPECT_TRUE(IsStringUTF8("abc")); 364 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 365 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 366 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 367 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 368 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 369 370 // surrogate code points 371 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 372 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 373 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 374 375 // overlong sequences 376 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 377 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 378 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 379 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 380 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 381 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 382 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 383 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 384 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 385 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 386 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 387 388 // Beyond U+10FFFF (the upper limit of Unicode codespace) 389 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 390 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 391 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 392 393 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 394 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 395 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 396 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 397 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 398 399 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 400 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 401 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 402 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 403 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 404 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 405 // Strings in legacy encodings. We can certainly make up strings 406 // in a legacy encoding that are valid in UTF-8, but in real data, 407 // most of them are invalid as UTF-8. 408 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 409 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 410 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 411 // "abc" with U+201[CD] in windows-125[0-8] 412 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 413 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 414 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 415 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 416 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 417 418 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 419 // representation, and the second uses a 2-byte sequence. The second version 420 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 421 // given codepoint must be used. 422 static const char kEmbeddedNull[] = "embedded\0null"; 423 EXPECT_TRUE(IsStringUTF8( 424 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 425 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 426} 427 428TEST(StringUtilTest, ConvertASCII) { 429 static const char* char_cases[] = { 430 "Google Video", 431 "Hello, world\n", 432 "0123ABCDwxyz \a\b\t\r\n!+,.~" 433 }; 434 435 static const wchar_t* const wchar_cases[] = { 436 L"Google Video", 437 L"Hello, world\n", 438 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 439 }; 440 441 for (size_t i = 0; i < arraysize(char_cases); ++i) { 442 EXPECT_TRUE(IsStringASCII(char_cases[i])); 443 std::wstring wide = ASCIIToWide(char_cases[i]); 444 EXPECT_EQ(wchar_cases[i], wide); 445 446 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); 447 std::string ascii = WideToASCII(wchar_cases[i]); 448 EXPECT_EQ(char_cases[i], ascii); 449 } 450 451 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 452 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); 453 454 // Convert empty strings. 455 std::wstring wempty; 456 std::string empty; 457 EXPECT_EQ(empty, WideToASCII(wempty)); 458 EXPECT_EQ(wempty, ASCIIToWide(empty)); 459 460 // Convert strings with an embedded NUL character. 461 const char chars_with_nul[] = "test\0string"; 462 const int length_with_nul = arraysize(chars_with_nul) - 1; 463 std::string string_with_nul(chars_with_nul, length_with_nul); 464 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); 465 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), 466 wide_with_nul.length()); 467 std::string narrow_with_nul = WideToASCII(wide_with_nul); 468 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 469 narrow_with_nul.length()); 470 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 471} 472 473TEST(StringUtilTest, ToUpperASCII) { 474 EXPECT_EQ('C', ToUpperASCII('C')); 475 EXPECT_EQ('C', ToUpperASCII('c')); 476 EXPECT_EQ('2', ToUpperASCII('2')); 477 478 EXPECT_EQ(L'C', ToUpperASCII(L'C')); 479 EXPECT_EQ(L'C', ToUpperASCII(L'c')); 480 EXPECT_EQ(L'2', ToUpperASCII(L'2')); 481 482 std::string in_place_a("Cc2"); 483 StringToUpperASCII(&in_place_a); 484 EXPECT_EQ("CC2", in_place_a); 485 486 std::wstring in_place_w(L"Cc2"); 487 StringToUpperASCII(&in_place_w); 488 EXPECT_EQ(L"CC2", in_place_w); 489 490 std::string original_a("Cc2"); 491 std::string upper_a = StringToUpperASCII(original_a); 492 EXPECT_EQ("CC2", upper_a); 493 494 std::wstring original_w(L"Cc2"); 495 std::wstring upper_w = StringToUpperASCII(original_w); 496 EXPECT_EQ(L"CC2", upper_w); 497} 498 499static const struct { 500 const wchar_t* src_w; 501 const char* src_a; 502 const char* dst; 503} lowercase_cases[] = { 504 {L"FoO", "FoO", "foo"}, 505 {L"foo", "foo", "foo"}, 506 {L"FOO", "FOO", "foo"}, 507}; 508 509TEST(StringUtilTest, LowerCaseEqualsASCII) { 510 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { 511 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, 512 lowercase_cases[i].dst)); 513 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 514 lowercase_cases[i].dst)); 515 } 516} 517 518TEST(StringUtilTest, GetByteDisplayUnits) { 519 static const struct { 520 int64 bytes; 521 DataUnits expected; 522 } cases[] = { 523 {0, DATA_UNITS_BYTE}, 524 {512, DATA_UNITS_BYTE}, 525 {10*1024, DATA_UNITS_KIBIBYTE}, 526 {10*1024*1024, DATA_UNITS_MEBIBYTE}, 527 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE}, 528 {~(1LL<<63), DATA_UNITS_GIBIBYTE}, 529#ifdef NDEBUG 530 {-1, DATA_UNITS_BYTE}, 531#endif 532 }; 533 534 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 535 EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes)); 536} 537 538TEST(StringUtilTest, FormatBytes) { 539 static const struct { 540 int64 bytes; 541 DataUnits units; 542 const char* expected; 543 const char* expected_with_units; 544 } cases[] = { 545 // Expected behavior: we show one post-decimal digit when we have 546 // under two pre-decimal digits, except in cases where it makes no 547 // sense (zero or bytes). 548 // Since we switch units once we cross the 1000 mark, this keeps 549 // the display of file sizes or bytes consistently around three 550 // digits. 551 {0, DATA_UNITS_BYTE, "0", "0 B"}, 552 {512, DATA_UNITS_BYTE, "512", "512 B"}, 553 {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"}, 554 {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"}, 555 {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"}, 556 {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"}, 557 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"}, 558 {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"}, 559 {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"}, 560 {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE, 561 "105", "105 GB"}, 562 {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"}, 563 564 {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"}, 565 {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"}, 566 {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"}, 567 {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE, 568 "1.9", "1.9 GB"}, 569 {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"}, 570 {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"}, 571#ifdef NDEBUG 572 {-1, DATA_UNITS_BYTE, "", ""}, 573#endif 574 }; 575 576 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 577 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), 578 FormatBytes(cases[i].bytes, cases[i].units, false)); 579 EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units), 580 FormatBytes(cases[i].bytes, cases[i].units, true)); 581 } 582} 583 584TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 585 static const struct { 586 const char* str; 587 string16::size_type start_offset; 588 const char* find_this; 589 const char* replace_with; 590 const char* expected; 591 } cases[] = { 592 {"aaa", 0, "a", "b", "bbb"}, 593 {"abb", 0, "ab", "a", "ab"}, 594 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 595 {"Not found", 0, "x", "0", "Not found"}, 596 {"Not found again", 5, "x", "0", "Not found again"}, 597 {" Making it much longer ", 0, " ", "Four score and seven years ago", 598 "Four score and seven years agoMakingFour score and seven years agoit" 599 "Four score and seven years agomuchFour score and seven years agolonger" 600 "Four score and seven years ago"}, 601 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 602 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 603 {"abababab", 2, "ab", "c", "abccc"}, 604 }; 605 606 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 607 string16 str = ASCIIToUTF16(cases[i].str); 608 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 609 ASCIIToUTF16(cases[i].find_this), 610 ASCIIToUTF16(cases[i].replace_with)); 611 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 612 } 613} 614 615TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 616 static const struct { 617 const char* str; 618 string16::size_type start_offset; 619 const char* find_this; 620 const char* replace_with; 621 const char* expected; 622 } cases[] = { 623 {"aaa", 0, "a", "b", "baa"}, 624 {"abb", 0, "ab", "a", "ab"}, 625 {"Removing some substrings inging", 0, "ing", "", 626 "Remov some substrings inging"}, 627 {"Not found", 0, "x", "0", "Not found"}, 628 {"Not found again", 5, "x", "0", "Not found again"}, 629 {" Making it much longer ", 0, " ", "Four score and seven years ago", 630 "Four score and seven years agoMaking it much longer "}, 631 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 632 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 633 {"abababab", 2, "ab", "c", "abcabab"}, 634 }; 635 636 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 637 string16 str = ASCIIToUTF16(cases[i].str); 638 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 639 ASCIIToUTF16(cases[i].find_this), 640 ASCIIToUTF16(cases[i].replace_with)); 641 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 642 } 643} 644 645TEST(StringUtilTest, HexDigitToInt) { 646 EXPECT_EQ(0, HexDigitToInt('0')); 647 EXPECT_EQ(1, HexDigitToInt('1')); 648 EXPECT_EQ(2, HexDigitToInt('2')); 649 EXPECT_EQ(3, HexDigitToInt('3')); 650 EXPECT_EQ(4, HexDigitToInt('4')); 651 EXPECT_EQ(5, HexDigitToInt('5')); 652 EXPECT_EQ(6, HexDigitToInt('6')); 653 EXPECT_EQ(7, HexDigitToInt('7')); 654 EXPECT_EQ(8, HexDigitToInt('8')); 655 EXPECT_EQ(9, HexDigitToInt('9')); 656 EXPECT_EQ(10, HexDigitToInt('A')); 657 EXPECT_EQ(11, HexDigitToInt('B')); 658 EXPECT_EQ(12, HexDigitToInt('C')); 659 EXPECT_EQ(13, HexDigitToInt('D')); 660 EXPECT_EQ(14, HexDigitToInt('E')); 661 EXPECT_EQ(15, HexDigitToInt('F')); 662 663 // Verify the lower case as well. 664 EXPECT_EQ(10, HexDigitToInt('a')); 665 EXPECT_EQ(11, HexDigitToInt('b')); 666 EXPECT_EQ(12, HexDigitToInt('c')); 667 EXPECT_EQ(13, HexDigitToInt('d')); 668 EXPECT_EQ(14, HexDigitToInt('e')); 669 EXPECT_EQ(15, HexDigitToInt('f')); 670} 671 672// This checks where we can use the assignment operator for a va_list. We need 673// a way to do this since Visual C doesn't support va_copy, but assignment on 674// va_list is not guaranteed to be a copy. See StringAppendVT which uses this 675// capability. 676static void VariableArgsFunc(const char* format, ...) { 677 va_list org; 678 va_start(org, format); 679 680 va_list dup; 681 GG_VA_COPY(dup, org); 682 int i1 = va_arg(org, int); 683 int j1 = va_arg(org, int); 684 char* s1 = va_arg(org, char*); 685 double d1 = va_arg(org, double); 686 va_end(org); 687 688 int i2 = va_arg(dup, int); 689 int j2 = va_arg(dup, int); 690 char* s2 = va_arg(dup, char*); 691 double d2 = va_arg(dup, double); 692 693 EXPECT_EQ(i1, i2); 694 EXPECT_EQ(j1, j2); 695 EXPECT_STREQ(s1, s2); 696 EXPECT_EQ(d1, d2); 697 698 va_end(dup); 699} 700 701TEST(StringUtilTest, VAList) { 702 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); 703} 704 705// Test for Tokenize 706template <typename STR> 707void TokenizeTest() { 708 std::vector<STR> r; 709 size_t size; 710 711 size = Tokenize(STR("This is a string"), STR(" "), &r); 712 EXPECT_EQ(4U, size); 713 ASSERT_EQ(4U, r.size()); 714 EXPECT_EQ(r[0], STR("This")); 715 EXPECT_EQ(r[1], STR("is")); 716 EXPECT_EQ(r[2], STR("a")); 717 EXPECT_EQ(r[3], STR("string")); 718 r.clear(); 719 720 size = Tokenize(STR("one,two,three"), STR(","), &r); 721 EXPECT_EQ(3U, size); 722 ASSERT_EQ(3U, r.size()); 723 EXPECT_EQ(r[0], STR("one")); 724 EXPECT_EQ(r[1], STR("two")); 725 EXPECT_EQ(r[2], STR("three")); 726 r.clear(); 727 728 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r); 729 EXPECT_EQ(3U, size); 730 ASSERT_EQ(3U, r.size()); 731 EXPECT_EQ(r[0], STR("one")); 732 EXPECT_EQ(r[1], STR("two")); 733 EXPECT_EQ(r[2], STR("three;four")); 734 r.clear(); 735 736 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r); 737 EXPECT_EQ(4U, size); 738 ASSERT_EQ(4U, r.size()); 739 EXPECT_EQ(r[0], STR("one")); 740 EXPECT_EQ(r[1], STR("two")); 741 EXPECT_EQ(r[2], STR("three")); 742 EXPECT_EQ(r[3], STR("four")); 743 r.clear(); 744 745 size = Tokenize(STR("one, two, three"), STR(","), &r); 746 EXPECT_EQ(3U, size); 747 ASSERT_EQ(3U, r.size()); 748 EXPECT_EQ(r[0], STR("one")); 749 EXPECT_EQ(r[1], STR(" two")); 750 EXPECT_EQ(r[2], STR(" three")); 751 r.clear(); 752 753 size = Tokenize(STR("one, two, three, "), STR(","), &r); 754 EXPECT_EQ(4U, size); 755 ASSERT_EQ(4U, r.size()); 756 EXPECT_EQ(r[0], STR("one")); 757 EXPECT_EQ(r[1], STR(" two")); 758 EXPECT_EQ(r[2], STR(" three")); 759 EXPECT_EQ(r[3], STR(" ")); 760 r.clear(); 761 762 size = Tokenize(STR("one, two, three,"), STR(","), &r); 763 EXPECT_EQ(3U, size); 764 ASSERT_EQ(3U, r.size()); 765 EXPECT_EQ(r[0], STR("one")); 766 EXPECT_EQ(r[1], STR(" two")); 767 EXPECT_EQ(r[2], STR(" three")); 768 r.clear(); 769 770 size = Tokenize(STR(""), STR(","), &r); 771 EXPECT_EQ(0U, size); 772 ASSERT_EQ(0U, r.size()); 773 r.clear(); 774 775 size = Tokenize(STR(","), STR(","), &r); 776 EXPECT_EQ(0U, size); 777 ASSERT_EQ(0U, r.size()); 778 r.clear(); 779 780 size = Tokenize(STR(",;:."), STR(".:;,"), &r); 781 EXPECT_EQ(0U, size); 782 ASSERT_EQ(0U, r.size()); 783 r.clear(); 784 785 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r); 786 EXPECT_EQ(1U, size); 787 ASSERT_EQ(1U, r.size()); 788 EXPECT_EQ(r[0], STR("a")); 789 r.clear(); 790 791 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r); 792 EXPECT_EQ(2U, size); 793 ASSERT_EQ(2U, r.size()); 794 EXPECT_EQ(r[0], STR("\ta\t")); 795 EXPECT_EQ(r[1], STR("b\tcc")); 796 r.clear(); 797} 798 799TEST(StringUtilTest, TokenizeStdString) { 800 TokenizeTest<std::string>(); 801} 802 803TEST(StringUtilTest, TokenizeStringPiece) { 804 TokenizeTest<base::StringPiece>(); 805} 806 807// Test for JoinString 808TEST(StringUtilTest, JoinString) { 809 std::vector<std::string> in; 810 EXPECT_EQ("", JoinString(in, ',')); 811 812 in.push_back("a"); 813 EXPECT_EQ("a", JoinString(in, ',')); 814 815 in.push_back("b"); 816 in.push_back("c"); 817 EXPECT_EQ("a,b,c", JoinString(in, ',')); 818 819 in.push_back(""); 820 EXPECT_EQ("a,b,c,", JoinString(in, ',')); 821 in.push_back(" "); 822 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); 823} 824 825TEST(StringUtilTest, StartsWith) { 826 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); 827 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); 828 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); 829 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); 830 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); 831 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); 832 EXPECT_FALSE(StartsWithASCII("", "javascript", false)); 833 EXPECT_FALSE(StartsWithASCII("", "javascript", true)); 834 EXPECT_TRUE(StartsWithASCII("java", "", false)); 835 EXPECT_TRUE(StartsWithASCII("java", "", true)); 836 837 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); 838 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); 839 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); 840 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); 841 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); 842 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); 843 EXPECT_FALSE(StartsWith(L"", L"javascript", false)); 844 EXPECT_FALSE(StartsWith(L"", L"javascript", true)); 845 EXPECT_TRUE(StartsWith(L"java", L"", false)); 846 EXPECT_TRUE(StartsWith(L"java", L"", true)); 847} 848 849TEST(StringUtilTest, EndsWith) { 850 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); 851 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); 852 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); 853 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); 854 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); 855 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); 856 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); 857 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); 858 EXPECT_FALSE(EndsWith(L"", L".plugin", false)); 859 EXPECT_FALSE(EndsWith(L"", L".plugin", true)); 860 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false)); 861 EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true)); 862 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); 863 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); 864 EXPECT_TRUE(EndsWith(L"", L"", false)); 865 EXPECT_TRUE(EndsWith(L"", L"", true)); 866} 867 868TEST(StringUtilTest, GetStringFWithOffsets) { 869 std::vector<string16> subst; 870 subst.push_back(ASCIIToUTF16("1")); 871 subst.push_back(ASCIIToUTF16("2")); 872 std::vector<size_t> offsets; 873 874 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 875 subst, 876 &offsets); 877 EXPECT_EQ(2U, offsets.size()); 878 EXPECT_EQ(7U, offsets[0]); 879 EXPECT_EQ(25U, offsets[1]); 880 offsets.clear(); 881 882 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 883 subst, 884 &offsets); 885 EXPECT_EQ(2U, offsets.size()); 886 EXPECT_EQ(25U, offsets[0]); 887 EXPECT_EQ(7U, offsets[1]); 888 offsets.clear(); 889} 890 891TEST(StringUtilTest, ReplaceStringPlaceholders) { 892 std::vector<string16> subst; 893 subst.push_back(ASCIIToUTF16("9a")); 894 subst.push_back(ASCIIToUTF16("8b")); 895 subst.push_back(ASCIIToUTF16("7c")); 896 subst.push_back(ASCIIToUTF16("6d")); 897 subst.push_back(ASCIIToUTF16("5e")); 898 subst.push_back(ASCIIToUTF16("4f")); 899 subst.push_back(ASCIIToUTF16("3g")); 900 subst.push_back(ASCIIToUTF16("2h")); 901 subst.push_back(ASCIIToUTF16("1i")); 902 903 string16 formatted = 904 ReplaceStringPlaceholders( 905 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); 906 907 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); 908} 909 910TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 911 // Test whether replacestringplaceholders works as expected when there 912 // are fewer inputs than outputs. 913 std::vector<string16> subst; 914 subst.push_back(ASCIIToUTF16("9a")); 915 subst.push_back(ASCIIToUTF16("8b")); 916 subst.push_back(ASCIIToUTF16("7c")); 917 918 string16 formatted = 919 ReplaceStringPlaceholders( 920 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); 921 922 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); 923} 924 925TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 926 std::vector<std::string> subst; 927 subst.push_back("9a"); 928 subst.push_back("8b"); 929 subst.push_back("7c"); 930 subst.push_back("6d"); 931 subst.push_back("5e"); 932 subst.push_back("4f"); 933 subst.push_back("3g"); 934 subst.push_back("2h"); 935 subst.push_back("1i"); 936 937 std::string formatted = 938 ReplaceStringPlaceholders( 939 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); 940 941 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); 942} 943 944TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { 945 std::vector<std::string> subst; 946 subst.push_back("a"); 947 subst.push_back("b"); 948 subst.push_back("c"); 949 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), 950 "$1 $$2 $$$3"); 951} 952 953TEST(StringUtilTest, SplitStringAlongWhitespace) { 954 struct TestData { 955 const std::wstring input; 956 const size_t expected_result_count; 957 const std::wstring output1; 958 const std::wstring output2; 959 } data[] = { 960 { L"a", 1, L"a", L"" }, 961 { L" ", 0, L"", L"" }, 962 { L" a", 1, L"a", L"" }, 963 { L" ab ", 1, L"ab", L"" }, 964 { L" ab c", 2, L"ab", L"c" }, 965 { L" ab c ", 2, L"ab", L"c" }, 966 { L" ab cd", 2, L"ab", L"cd" }, 967 { L" ab cd ", 2, L"ab", L"cd" }, 968 { L" \ta\t", 1, L"a", L"" }, 969 { L" b\ta\t", 2, L"b", L"a" }, 970 { L" b\tat", 2, L"b", L"at" }, 971 { L"b\tat", 2, L"b", L"at" }, 972 { L"b\t at", 2, L"b", L"at" }, 973 }; 974 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) { 975 std::vector<std::wstring> results; 976 SplitStringAlongWhitespace(data[i].input, &results); 977 ASSERT_EQ(data[i].expected_result_count, results.size()); 978 if (data[i].expected_result_count > 0) 979 ASSERT_EQ(data[i].output1, results[0]); 980 if (data[i].expected_result_count > 1) 981 ASSERT_EQ(data[i].output2, results[1]); 982 } 983} 984 985TEST(StringUtilTest, MatchPatternTest) { 986 EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); 987 EXPECT_TRUE(MatchPattern("www.google.com", "*")); 988 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); 989 EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); 990 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); 991 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); 992 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); 993 EXPECT_FALSE(MatchPattern("", "*.*")); 994 EXPECT_TRUE(MatchPattern("", "*")); 995 EXPECT_TRUE(MatchPattern("", "?")); 996 EXPECT_TRUE(MatchPattern("", "")); 997 EXPECT_FALSE(MatchPattern("Hello", "")); 998 EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); 999 // Stop after a certain recursion depth. 1000 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); 1001 1002 // Test UTF8 matching. 1003 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); 1004 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); 1005 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); 1006 // Invalid sequences should be handled as a single invalid character. 1007 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); 1008 // If the pattern has invalid characters, it shouldn't match anything. 1009 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); 1010 1011 // Test UTF16 character matching. 1012 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), 1013 UTF8ToUTF16("*.com"))); 1014 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), 1015 UTF8ToUTF16("He??o\\*1*"))); 1016} 1017 1018TEST(StringUtilTest, LcpyTest) { 1019 // Test the normal case where we fit in our buffer. 1020 { 1021 char dst[10]; 1022 wchar_t wdst[10]; 1023 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1024 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1025 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1026 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1027 } 1028 1029 // Test dst_size == 0, nothing should be written to |dst| and we should 1030 // have the equivalent of strlen(src). 1031 { 1032 char dst[2] = {1, 2}; 1033 wchar_t wdst[2] = {1, 2}; 1034 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); 1035 EXPECT_EQ(1, dst[0]); 1036 EXPECT_EQ(2, dst[1]); 1037 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); 1038#if defined(WCHAR_T_IS_UNSIGNED) 1039 EXPECT_EQ(1U, wdst[0]); 1040 EXPECT_EQ(2U, wdst[1]); 1041#else 1042 EXPECT_EQ(1, wdst[0]); 1043 EXPECT_EQ(2, wdst[1]); 1044#endif 1045 } 1046 1047 // Test the case were we _just_ competely fit including the null. 1048 { 1049 char dst[8]; 1050 wchar_t wdst[8]; 1051 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1052 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1053 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1054 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1055 } 1056 1057 // Test the case were we we are one smaller, so we can't fit the null. 1058 { 1059 char dst[7]; 1060 wchar_t wdst[7]; 1061 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1062 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 1063 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1064 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 1065 } 1066 1067 // Test the case were we are just too small. 1068 { 1069 char dst[3]; 1070 wchar_t wdst[3]; 1071 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1072 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 1073 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1074 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 1075 } 1076} 1077 1078TEST(StringUtilTest, WprintfFormatPortabilityTest) { 1079 struct TestData { 1080 const wchar_t* input; 1081 bool portable; 1082 } cases[] = { 1083 { L"%ls", true }, 1084 { L"%s", false }, 1085 { L"%S", false }, 1086 { L"%lS", false }, 1087 { L"Hello, %s", false }, 1088 { L"%lc", true }, 1089 { L"%c", false }, 1090 { L"%C", false }, 1091 { L"%lC", false }, 1092 { L"%ls %s", false }, 1093 { L"%s %ls", false }, 1094 { L"%s %ls %s", false }, 1095 { L"%f", true }, 1096 { L"%f %F", false }, 1097 { L"%d %D", false }, 1098 { L"%o %O", false }, 1099 { L"%u %U", false }, 1100 { L"%f %d %o %u", true }, 1101 { L"%-8d (%02.1f%)", true }, 1102 { L"% 10s", false }, 1103 { L"% 10ls", true } 1104 }; 1105 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1106 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); 1107 } 1108} 1109 1110TEST(StringUtilTest, ElideString) { 1111 struct TestData { 1112 const wchar_t* input; 1113 int max_len; 1114 bool result; 1115 const wchar_t* output; 1116 } cases[] = { 1117 { L"Hello", 0, true, L"" }, 1118 { L"", 0, false, L"" }, 1119 { L"Hello, my name is Tom", 1, true, L"H" }, 1120 { L"Hello, my name is Tom", 2, true, L"He" }, 1121 { L"Hello, my name is Tom", 3, true, L"H.m" }, 1122 { L"Hello, my name is Tom", 4, true, L"H..m" }, 1123 { L"Hello, my name is Tom", 5, true, L"H...m" }, 1124 { L"Hello, my name is Tom", 6, true, L"He...m" }, 1125 { L"Hello, my name is Tom", 7, true, L"He...om" }, 1126 { L"Hello, my name is Tom", 10, true, L"Hell...Tom" }, 1127 { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" } 1128 }; 1129 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1130 std::wstring output; 1131 EXPECT_EQ(cases[i].result, 1132 ElideString(cases[i].input, cases[i].max_len, &output)); 1133 EXPECT_TRUE(output == cases[i].output); 1134 } 1135} 1136 1137TEST(StringUtilTest, RemoveChars) { 1138 const char* kRemoveChars = "-/+*"; 1139 std::string input = "A-+bc/d!*"; 1140 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); 1141 EXPECT_EQ("Abcd!", input); 1142 1143 // No characters match kRemoveChars. 1144 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1145 EXPECT_EQ("Abcd!", input); 1146 1147 // Empty string. 1148 input.clear(); 1149 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1150 EXPECT_EQ(std::string(), input); 1151} 1152 1153TEST(StringUtilTest, ContainsOnlyChars) { 1154 // Providing an empty list of characters should return false but for the empty 1155 // string. 1156 EXPECT_TRUE(ContainsOnlyChars("", "")); 1157 EXPECT_FALSE(ContainsOnlyChars("Hello", "")); 1158 1159 EXPECT_TRUE(ContainsOnlyChars("", "1234")); 1160 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1161 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1162 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1163 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1164} 1165 1166} // namespace base 1167