1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/strings/string_util.h" 6 7#include <math.h> 8#include <stdarg.h> 9 10#include <limits> 11#include <sstream> 12 13#include "base/basictypes.h" 14#include "base/strings/string16.h" 15#include "base/strings/utf_string_conversions.h" 16#include "testing/gmock/include/gmock/gmock.h" 17#include "testing/gtest/include/gtest/gtest.h" 18 19using ::testing::ElementsAre; 20 21namespace base { 22 23static const struct trim_case { 24 const wchar_t* input; 25 const TrimPositions positions; 26 const wchar_t* output; 27 const TrimPositions return_value; 28} trim_cases[] = { 29 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 30 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 31 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 32 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 33 {L"", TRIM_ALL, L"", TRIM_NONE}, 34 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 35 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 36 {L" ", TRIM_ALL, L"", TRIM_ALL}, 37 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 38 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 39}; 40 41static const struct trim_case_ascii { 42 const char* input; 43 const TrimPositions positions; 44 const char* output; 45 const TrimPositions return_value; 46} trim_cases_ascii[] = { 47 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 48 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 49 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 50 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 51 {"", TRIM_ALL, "", TRIM_NONE}, 52 {" ", TRIM_LEADING, "", TRIM_LEADING}, 53 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 54 {" ", TRIM_ALL, "", TRIM_ALL}, 55 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 56}; 57 58namespace { 59 60// Helper used to test TruncateUTF8ToByteSize. 61bool Truncated(const std::string& input, const size_t byte_size, 62 std::string* output) { 63 size_t prev = input.length(); 64 TruncateUTF8ToByteSize(input, byte_size, output); 65 return prev != output->length(); 66} 67 68} // namespace 69 70TEST(StringUtilTest, TruncateUTF8ToByteSize) { 71 std::string output; 72 73 // Empty strings and invalid byte_size arguments 74 EXPECT_FALSE(Truncated(std::string(), 0, &output)); 75 EXPECT_EQ(output, ""); 76 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); 77 EXPECT_EQ(output, ""); 78 EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output)); 79 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); 80 81 // Testing the truncation of valid UTF8 correctly 82 EXPECT_TRUE(Truncated("abc", 2, &output)); 83 EXPECT_EQ(output, "ab"); 84 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); 85 EXPECT_EQ(output.compare("\xc2\x81"), 0); 86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); 87 EXPECT_EQ(output.compare("\xc2\x81"), 0); 88 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); 89 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); 90 91 { 92 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; 93 const std::string array_string(array, arraysize(array)); 94 EXPECT_TRUE(Truncated(array_string, 4, &output)); 95 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); 96 } 97 98 { 99 const char array[] = "\x00\xc2\x81\xc2\x81"; 100 const std::string array_string(array, arraysize(array)); 101 EXPECT_TRUE(Truncated(array_string, 4, &output)); 102 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); 103 } 104 105 // Testing invalid UTF8 106 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); 107 EXPECT_EQ(output.compare(""), 0); 108 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); 109 EXPECT_EQ(output.compare(""), 0); 110 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); 111 EXPECT_EQ(output.compare(""), 0); 112 113 // Testing invalid UTF8 mixed with valid UTF8 114 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); 115 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); 116 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); 117 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); 118 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 119 10, &output)); 120 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); 121 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", 122 10, &output)); 123 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); 124 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); 125 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); 126 127 // Overlong sequences 128 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); 129 EXPECT_EQ(output.compare(""), 0); 130 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); 131 EXPECT_EQ(output.compare(""), 0); 132 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); 133 EXPECT_EQ(output.compare(""), 0); 134 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); 135 EXPECT_EQ(output.compare(""), 0); 136 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); 137 EXPECT_EQ(output.compare(""), 0); 138 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); 139 EXPECT_EQ(output.compare(""), 0); 140 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); 141 EXPECT_EQ(output.compare(""), 0); 142 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); 143 EXPECT_EQ(output.compare(""), 0); 144 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); 145 EXPECT_EQ(output.compare(""), 0); 146 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); 147 EXPECT_EQ(output.compare(""), 0); 148 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); 149 EXPECT_EQ(output.compare(""), 0); 150 151 // Beyond U+10FFFF (the upper limit of Unicode codespace) 152 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); 153 EXPECT_EQ(output.compare(""), 0); 154 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); 155 EXPECT_EQ(output.compare(""), 0); 156 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); 157 EXPECT_EQ(output.compare(""), 0); 158 159 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 160 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); 161 EXPECT_EQ(output.compare(""), 0); 162 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); 163 EXPECT_EQ(output.compare(""), 0); 164 165 { 166 const char array[] = "\x00\x00\xfe\xff"; 167 const std::string array_string(array, arraysize(array)); 168 EXPECT_TRUE(Truncated(array_string, 4, &output)); 169 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); 170 } 171 172 // Variants on the previous test 173 { 174 const char array[] = "\xff\xfe\x00\x00"; 175 const std::string array_string(array, 4); 176 EXPECT_FALSE(Truncated(array_string, 4, &output)); 177 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); 178 } 179 { 180 const char array[] = "\xff\x00\x00\xfe"; 181 const std::string array_string(array, arraysize(array)); 182 EXPECT_TRUE(Truncated(array_string, 4, &output)); 183 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); 184 } 185 186 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 187 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); 188 EXPECT_EQ(output.compare(""), 0); 189 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); 190 EXPECT_EQ(output.compare(""), 0); 191 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); 192 EXPECT_EQ(output.compare(""), 0); 193 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); 194 EXPECT_EQ(output.compare(""), 0); 195 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); 196 EXPECT_EQ(output.compare(""), 0); 197 198 // Strings in legacy encodings that are valid in UTF-8, but 199 // are invalid as UTF-8 in real data. 200 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); 201 EXPECT_EQ(output.compare("caf"), 0); 202 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); 203 EXPECT_EQ(output.compare(""), 0); 204 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); 205 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 206 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, 207 &output)); 208 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 209 210 // Testing using the same string as input and output. 211 EXPECT_FALSE(Truncated(output, 4, &output)); 212 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 213 EXPECT_TRUE(Truncated(output, 3, &output)); 214 EXPECT_EQ(output.compare("\xa7\x41"), 0); 215 216 // "abc" with U+201[CD] in windows-125[0-8] 217 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); 218 EXPECT_EQ(output.compare("\x93" "abc"), 0); 219 220 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 221 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); 222 EXPECT_EQ(output.compare(""), 0); 223 224 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 225 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); 226 EXPECT_EQ(output.compare(""), 0); 227} 228 229TEST(StringUtilTest, TrimWhitespace) { 230 string16 output; // Allow contents to carry over to next testcase 231 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 232 const trim_case& value = trim_cases[i]; 233 EXPECT_EQ(value.return_value, 234 TrimWhitespace(WideToUTF16(value.input), value.positions, 235 &output)); 236 EXPECT_EQ(WideToUTF16(value.output), output); 237 } 238 239 // Test that TrimWhitespace() can take the same string for input and output 240 output = ASCIIToUTF16(" This is a test \r\n"); 241 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 242 EXPECT_EQ(ASCIIToUTF16("This is a test"), output); 243 244 // Once more, but with a string of whitespace 245 output = ASCIIToUTF16(" \r\n"); 246 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 247 EXPECT_EQ(string16(), output); 248 249 std::string output_ascii; 250 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 251 const trim_case_ascii& value = trim_cases_ascii[i]; 252 EXPECT_EQ(value.return_value, 253 TrimWhitespace(value.input, value.positions, &output_ascii)); 254 EXPECT_EQ(value.output, output_ascii); 255 } 256} 257 258static const struct collapse_case { 259 const wchar_t* input; 260 const bool trim; 261 const wchar_t* output; 262} collapse_cases[] = { 263 {L" Google Video ", false, L"Google Video"}, 264 {L"Google Video", false, L"Google Video"}, 265 {L"", false, L""}, 266 {L" ", false, L""}, 267 {L"\t\rTest String\n", false, L"Test String"}, 268 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 269 {L" Test \n \t String ", false, L"Test String"}, 270 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 271 {L" Test String", false, L"Test String"}, 272 {L"Test String ", false, L"Test String"}, 273 {L"Test String", false, L"Test String"}, 274 {L"", true, L""}, 275 {L"\n", true, L""}, 276 {L" \r ", true, L""}, 277 {L"\nFoo", true, L"Foo"}, 278 {L"\r Foo ", true, L"Foo"}, 279 {L" Foo bar ", true, L"Foo bar"}, 280 {L" \tFoo bar \n", true, L"Foo bar"}, 281 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 282}; 283 284TEST(StringUtilTest, CollapseWhitespace) { 285 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 286 const collapse_case& value = collapse_cases[i]; 287 EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim)); 288 } 289} 290 291static const struct collapse_case_ascii { 292 const char* input; 293 const bool trim; 294 const char* output; 295} collapse_cases_ascii[] = { 296 {" Google Video ", false, "Google Video"}, 297 {"Google Video", false, "Google Video"}, 298 {"", false, ""}, 299 {" ", false, ""}, 300 {"\t\rTest String\n", false, "Test String"}, 301 {" Test \n \t String ", false, "Test String"}, 302 {" Test String", false, "Test String"}, 303 {"Test String ", false, "Test String"}, 304 {"Test String", false, "Test String"}, 305 {"", true, ""}, 306 {"\n", true, ""}, 307 {" \r ", true, ""}, 308 {"\nFoo", true, "Foo"}, 309 {"\r Foo ", true, "Foo"}, 310 {" Foo bar ", true, "Foo bar"}, 311 {" \tFoo bar \n", true, "Foo bar"}, 312 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 313}; 314 315TEST(StringUtilTest, CollapseWhitespaceASCII) { 316 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 317 const collapse_case_ascii& value = collapse_cases_ascii[i]; 318 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 319 } 320} 321 322TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) { 323 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string())); 324 EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" ")); 325 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t")); 326 EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n ")); 327 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a")); 328 EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n ")); 329} 330 331TEST(StringUtilTest, ContainsOnlyWhitespace) { 332 EXPECT_TRUE(ContainsOnlyWhitespace(string16())); 333 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" "))); 334 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t"))); 335 EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n "))); 336 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a"))); 337 EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n "))); 338} 339 340TEST(StringUtilTest, IsStringUTF8) { 341 EXPECT_TRUE(IsStringUTF8("abc")); 342 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 343 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 344 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 345 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 346 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 347 348 // surrogate code points 349 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 350 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 351 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 352 353 // overlong sequences 354 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 355 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 356 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 357 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 358 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 359 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 360 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 361 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 362 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 363 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 364 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 365 366 // Beyond U+10FFFF (the upper limit of Unicode codespace) 367 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 368 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 369 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 370 371 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 372 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 373 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 374 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 375 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 376 377 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 378 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 379 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 380 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 381 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 382 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 383 // Strings in legacy encodings. We can certainly make up strings 384 // in a legacy encoding that are valid in UTF-8, but in real data, 385 // most of them are invalid as UTF-8. 386 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 387 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 388 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 389 // "abc" with U+201[CD] in windows-125[0-8] 390 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 391 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 392 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 393 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 394 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 395 396 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 397 // representation, and the second uses a 2-byte sequence. The second version 398 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 399 // given codepoint must be used. 400 static const char kEmbeddedNull[] = "embedded\0null"; 401 EXPECT_TRUE(IsStringUTF8( 402 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 403 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 404} 405 406TEST(StringUtilTest, ConvertASCII) { 407 static const char* char_cases[] = { 408 "Google Video", 409 "Hello, world\n", 410 "0123ABCDwxyz \a\b\t\r\n!+,.~" 411 }; 412 413 static const wchar_t* const wchar_cases[] = { 414 L"Google Video", 415 L"Hello, world\n", 416 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 417 }; 418 419 for (size_t i = 0; i < arraysize(char_cases); ++i) { 420 EXPECT_TRUE(IsStringASCII(char_cases[i])); 421 std::wstring wide = ASCIIToWide(char_cases[i]); 422 EXPECT_EQ(wchar_cases[i], wide); 423 424 EXPECT_TRUE(IsStringASCII(wchar_cases[i])); 425 std::string ascii = WideToASCII(wchar_cases[i]); 426 EXPECT_EQ(char_cases[i], ascii); 427 } 428 429 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 430 EXPECT_FALSE(IsStringASCII(L"Google \x80Video")); 431 432 // Convert empty strings. 433 std::wstring wempty; 434 std::string empty; 435 EXPECT_EQ(empty, WideToASCII(wempty)); 436 EXPECT_EQ(wempty, ASCIIToWide(empty)); 437 438 // Convert strings with an embedded NUL character. 439 const char chars_with_nul[] = "test\0string"; 440 const int length_with_nul = arraysize(chars_with_nul) - 1; 441 std::string string_with_nul(chars_with_nul, length_with_nul); 442 std::wstring wide_with_nul = ASCIIToWide(string_with_nul); 443 EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul), 444 wide_with_nul.length()); 445 std::string narrow_with_nul = WideToASCII(wide_with_nul); 446 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 447 narrow_with_nul.length()); 448 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 449} 450 451TEST(StringUtilTest, ToUpperASCII) { 452 EXPECT_EQ('C', ToUpperASCII('C')); 453 EXPECT_EQ('C', ToUpperASCII('c')); 454 EXPECT_EQ('2', ToUpperASCII('2')); 455 456 EXPECT_EQ(L'C', ToUpperASCII(L'C')); 457 EXPECT_EQ(L'C', ToUpperASCII(L'c')); 458 EXPECT_EQ(L'2', ToUpperASCII(L'2')); 459 460 std::string in_place_a("Cc2"); 461 StringToUpperASCII(&in_place_a); 462 EXPECT_EQ("CC2", in_place_a); 463 464 std::wstring in_place_w(L"Cc2"); 465 StringToUpperASCII(&in_place_w); 466 EXPECT_EQ(L"CC2", in_place_w); 467 468 std::string original_a("Cc2"); 469 std::string upper_a = StringToUpperASCII(original_a); 470 EXPECT_EQ("CC2", upper_a); 471 472 std::wstring original_w(L"Cc2"); 473 std::wstring upper_w = StringToUpperASCII(original_w); 474 EXPECT_EQ(L"CC2", upper_w); 475} 476 477TEST(StringUtilTest, LowerCaseEqualsASCII) { 478 static const struct { 479 const wchar_t* src_w; 480 const char* src_a; 481 const char* dst; 482 } lowercase_cases[] = { 483 { L"FoO", "FoO", "foo" }, 484 { L"foo", "foo", "foo" }, 485 { L"FOO", "FOO", "foo" }, 486 }; 487 488 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) { 489 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w, 490 lowercase_cases[i].dst)); 491 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 492 lowercase_cases[i].dst)); 493 } 494} 495 496TEST(StringUtilTest, FormatBytesUnlocalized) { 497 static const struct { 498 int64 bytes; 499 const char* expected; 500 } cases[] = { 501 // Expected behavior: we show one post-decimal digit when we have 502 // under two pre-decimal digits, except in cases where it makes no 503 // sense (zero or bytes). 504 // Since we switch units once we cross the 1000 mark, this keeps 505 // the display of file sizes or bytes consistently around three 506 // digits. 507 {0, "0 B"}, 508 {512, "512 B"}, 509 {1024*1024, "1.0 MB"}, 510 {1024*1024*1024, "1.0 GB"}, 511 {10LL*1024*1024*1024, "10.0 GB"}, 512 {99LL*1024*1024*1024, "99.0 GB"}, 513 {105LL*1024*1024*1024, "105 GB"}, 514 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, 515 {~(1LL<<63), "8192 PB"}, 516 517 {99*1024 + 103, "99.1 kB"}, 518 {1024*1024 + 103, "1.0 MB"}, 519 {1024*1024 + 205 * 1024, "1.2 MB"}, 520 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, 521 {10LL*1024*1024*1024, "10.0 GB"}, 522 {100LL*1024*1024*1024, "100 GB"}, 523 }; 524 525 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 526 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), 527 FormatBytesUnlocalized(cases[i].bytes)); 528 } 529} 530TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 531 static const struct { 532 const char* str; 533 string16::size_type start_offset; 534 const char* find_this; 535 const char* replace_with; 536 const char* expected; 537 } cases[] = { 538 {"aaa", 0, "a", "b", "bbb"}, 539 {"abb", 0, "ab", "a", "ab"}, 540 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 541 {"Not found", 0, "x", "0", "Not found"}, 542 {"Not found again", 5, "x", "0", "Not found again"}, 543 {" Making it much longer ", 0, " ", "Four score and seven years ago", 544 "Four score and seven years agoMakingFour score and seven years agoit" 545 "Four score and seven years agomuchFour score and seven years agolonger" 546 "Four score and seven years ago"}, 547 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 548 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 549 {"abababab", 2, "ab", "c", "abccc"}, 550 }; 551 552 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 553 string16 str = ASCIIToUTF16(cases[i].str); 554 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 555 ASCIIToUTF16(cases[i].find_this), 556 ASCIIToUTF16(cases[i].replace_with)); 557 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 558 } 559} 560 561TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 562 static const struct { 563 const char* str; 564 string16::size_type start_offset; 565 const char* find_this; 566 const char* replace_with; 567 const char* expected; 568 } cases[] = { 569 {"aaa", 0, "a", "b", "baa"}, 570 {"abb", 0, "ab", "a", "ab"}, 571 {"Removing some substrings inging", 0, "ing", "", 572 "Remov some substrings inging"}, 573 {"Not found", 0, "x", "0", "Not found"}, 574 {"Not found again", 5, "x", "0", "Not found again"}, 575 {" Making it much longer ", 0, " ", "Four score and seven years ago", 576 "Four score and seven years agoMaking it much longer "}, 577 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 578 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 579 {"abababab", 2, "ab", "c", "abcabab"}, 580 }; 581 582 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) { 583 string16 str = ASCIIToUTF16(cases[i].str); 584 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 585 ASCIIToUTF16(cases[i].find_this), 586 ASCIIToUTF16(cases[i].replace_with)); 587 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 588 } 589} 590 591TEST(StringUtilTest, HexDigitToInt) { 592 EXPECT_EQ(0, HexDigitToInt('0')); 593 EXPECT_EQ(1, HexDigitToInt('1')); 594 EXPECT_EQ(2, HexDigitToInt('2')); 595 EXPECT_EQ(3, HexDigitToInt('3')); 596 EXPECT_EQ(4, HexDigitToInt('4')); 597 EXPECT_EQ(5, HexDigitToInt('5')); 598 EXPECT_EQ(6, HexDigitToInt('6')); 599 EXPECT_EQ(7, HexDigitToInt('7')); 600 EXPECT_EQ(8, HexDigitToInt('8')); 601 EXPECT_EQ(9, HexDigitToInt('9')); 602 EXPECT_EQ(10, HexDigitToInt('A')); 603 EXPECT_EQ(11, HexDigitToInt('B')); 604 EXPECT_EQ(12, HexDigitToInt('C')); 605 EXPECT_EQ(13, HexDigitToInt('D')); 606 EXPECT_EQ(14, HexDigitToInt('E')); 607 EXPECT_EQ(15, HexDigitToInt('F')); 608 609 // Verify the lower case as well. 610 EXPECT_EQ(10, HexDigitToInt('a')); 611 EXPECT_EQ(11, HexDigitToInt('b')); 612 EXPECT_EQ(12, HexDigitToInt('c')); 613 EXPECT_EQ(13, HexDigitToInt('d')); 614 EXPECT_EQ(14, HexDigitToInt('e')); 615 EXPECT_EQ(15, HexDigitToInt('f')); 616} 617 618// This checks where we can use the assignment operator for a va_list. We need 619// a way to do this since Visual C doesn't support va_copy, but assignment on 620// va_list is not guaranteed to be a copy. See StringAppendVT which uses this 621// capability. 622static void VariableArgsFunc(const char* format, ...) { 623 va_list org; 624 va_start(org, format); 625 626 va_list dup; 627 GG_VA_COPY(dup, org); 628 int i1 = va_arg(org, int); 629 int j1 = va_arg(org, int); 630 char* s1 = va_arg(org, char*); 631 double d1 = va_arg(org, double); 632 va_end(org); 633 634 int i2 = va_arg(dup, int); 635 int j2 = va_arg(dup, int); 636 char* s2 = va_arg(dup, char*); 637 double d2 = va_arg(dup, double); 638 639 EXPECT_EQ(i1, i2); 640 EXPECT_EQ(j1, j2); 641 EXPECT_STREQ(s1, s2); 642 EXPECT_EQ(d1, d2); 643 644 va_end(dup); 645} 646 647TEST(StringUtilTest, VAList) { 648 VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21); 649} 650 651// Test for Tokenize 652template <typename STR> 653void TokenizeTest() { 654 std::vector<STR> r; 655 size_t size; 656 657 size = Tokenize(STR("This is a string"), STR(" "), &r); 658 EXPECT_EQ(4U, size); 659 ASSERT_EQ(4U, r.size()); 660 EXPECT_EQ(r[0], STR("This")); 661 EXPECT_EQ(r[1], STR("is")); 662 EXPECT_EQ(r[2], STR("a")); 663 EXPECT_EQ(r[3], STR("string")); 664 r.clear(); 665 666 size = Tokenize(STR("one,two,three"), STR(","), &r); 667 EXPECT_EQ(3U, size); 668 ASSERT_EQ(3U, r.size()); 669 EXPECT_EQ(r[0], STR("one")); 670 EXPECT_EQ(r[1], STR("two")); 671 EXPECT_EQ(r[2], STR("three")); 672 r.clear(); 673 674 size = Tokenize(STR("one,two:three;four"), STR(",:"), &r); 675 EXPECT_EQ(3U, size); 676 ASSERT_EQ(3U, r.size()); 677 EXPECT_EQ(r[0], STR("one")); 678 EXPECT_EQ(r[1], STR("two")); 679 EXPECT_EQ(r[2], STR("three;four")); 680 r.clear(); 681 682 size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r); 683 EXPECT_EQ(4U, size); 684 ASSERT_EQ(4U, r.size()); 685 EXPECT_EQ(r[0], STR("one")); 686 EXPECT_EQ(r[1], STR("two")); 687 EXPECT_EQ(r[2], STR("three")); 688 EXPECT_EQ(r[3], STR("four")); 689 r.clear(); 690 691 size = Tokenize(STR("one, two, three"), STR(","), &r); 692 EXPECT_EQ(3U, size); 693 ASSERT_EQ(3U, r.size()); 694 EXPECT_EQ(r[0], STR("one")); 695 EXPECT_EQ(r[1], STR(" two")); 696 EXPECT_EQ(r[2], STR(" three")); 697 r.clear(); 698 699 size = Tokenize(STR("one, two, three, "), STR(","), &r); 700 EXPECT_EQ(4U, size); 701 ASSERT_EQ(4U, r.size()); 702 EXPECT_EQ(r[0], STR("one")); 703 EXPECT_EQ(r[1], STR(" two")); 704 EXPECT_EQ(r[2], STR(" three")); 705 EXPECT_EQ(r[3], STR(" ")); 706 r.clear(); 707 708 size = Tokenize(STR("one, two, three,"), STR(","), &r); 709 EXPECT_EQ(3U, size); 710 ASSERT_EQ(3U, r.size()); 711 EXPECT_EQ(r[0], STR("one")); 712 EXPECT_EQ(r[1], STR(" two")); 713 EXPECT_EQ(r[2], STR(" three")); 714 r.clear(); 715 716 size = Tokenize(STR(), STR(","), &r); 717 EXPECT_EQ(0U, size); 718 ASSERT_EQ(0U, r.size()); 719 r.clear(); 720 721 size = Tokenize(STR(","), STR(","), &r); 722 EXPECT_EQ(0U, size); 723 ASSERT_EQ(0U, r.size()); 724 r.clear(); 725 726 size = Tokenize(STR(",;:."), STR(".:;,"), &r); 727 EXPECT_EQ(0U, size); 728 ASSERT_EQ(0U, r.size()); 729 r.clear(); 730 731 size = Tokenize(STR("\t\ta\t"), STR("\t"), &r); 732 EXPECT_EQ(1U, size); 733 ASSERT_EQ(1U, r.size()); 734 EXPECT_EQ(r[0], STR("a")); 735 r.clear(); 736 737 size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r); 738 EXPECT_EQ(2U, size); 739 ASSERT_EQ(2U, r.size()); 740 EXPECT_EQ(r[0], STR("\ta\t")); 741 EXPECT_EQ(r[1], STR("b\tcc")); 742 r.clear(); 743} 744 745TEST(StringUtilTest, TokenizeStdString) { 746 TokenizeTest<std::string>(); 747} 748 749TEST(StringUtilTest, TokenizeStringPiece) { 750 TokenizeTest<base::StringPiece>(); 751} 752 753// Test for JoinString 754TEST(StringUtilTest, JoinString) { 755 std::vector<std::string> in; 756 EXPECT_EQ("", JoinString(in, ',')); 757 758 in.push_back("a"); 759 EXPECT_EQ("a", JoinString(in, ',')); 760 761 in.push_back("b"); 762 in.push_back("c"); 763 EXPECT_EQ("a,b,c", JoinString(in, ',')); 764 765 in.push_back(std::string()); 766 EXPECT_EQ("a,b,c,", JoinString(in, ',')); 767 in.push_back(" "); 768 EXPECT_EQ("a|b|c|| ", JoinString(in, '|')); 769} 770 771// Test for JoinString overloaded with std::string separator 772TEST(StringUtilTest, JoinStringWithString) { 773 std::string separator(", "); 774 std::vector<std::string> parts; 775 EXPECT_EQ(std::string(), JoinString(parts, separator)); 776 777 parts.push_back("a"); 778 EXPECT_EQ("a", JoinString(parts, separator)); 779 780 parts.push_back("b"); 781 parts.push_back("c"); 782 EXPECT_EQ("a, b, c", JoinString(parts, separator)); 783 784 parts.push_back(std::string()); 785 EXPECT_EQ("a, b, c, ", JoinString(parts, separator)); 786 parts.push_back(" "); 787 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|")); 788} 789 790// Test for JoinString overloaded with string16 separator 791TEST(StringUtilTest, JoinStringWithString16) { 792 string16 separator = ASCIIToUTF16(", "); 793 std::vector<string16> parts; 794 EXPECT_EQ(string16(), JoinString(parts, separator)); 795 796 parts.push_back(ASCIIToUTF16("a")); 797 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); 798 799 parts.push_back(ASCIIToUTF16("b")); 800 parts.push_back(ASCIIToUTF16("c")); 801 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); 802 803 parts.push_back(ASCIIToUTF16("")); 804 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); 805 parts.push_back(ASCIIToUTF16(" ")); 806 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); 807} 808 809TEST(StringUtilTest, StartsWith) { 810 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true)); 811 EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true)); 812 EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false)); 813 EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false)); 814 EXPECT_FALSE(StartsWithASCII("java", "javascript", true)); 815 EXPECT_FALSE(StartsWithASCII("java", "javascript", false)); 816 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false)); 817 EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true)); 818 EXPECT_TRUE(StartsWithASCII("java", std::string(), false)); 819 EXPECT_TRUE(StartsWithASCII("java", std::string(), true)); 820 821 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true)); 822 EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true)); 823 EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false)); 824 EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false)); 825 EXPECT_FALSE(StartsWith(L"java", L"javascript", true)); 826 EXPECT_FALSE(StartsWith(L"java", L"javascript", false)); 827 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false)); 828 EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true)); 829 EXPECT_TRUE(StartsWith(L"java", std::wstring(), false)); 830 EXPECT_TRUE(StartsWith(L"java", std::wstring(), true)); 831} 832 833TEST(StringUtilTest, EndsWith) { 834 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true)); 835 EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true)); 836 EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false)); 837 EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false)); 838 EXPECT_FALSE(EndsWith(L".plug", L".plugin", true)); 839 EXPECT_FALSE(EndsWith(L".plug", L".plugin", false)); 840 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true)); 841 EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false)); 842 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false)); 843 EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true)); 844 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false)); 845 EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true)); 846 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false)); 847 EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true)); 848 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false)); 849 EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true)); 850} 851 852TEST(StringUtilTest, GetStringFWithOffsets) { 853 std::vector<string16> subst; 854 subst.push_back(ASCIIToUTF16("1")); 855 subst.push_back(ASCIIToUTF16("2")); 856 std::vector<size_t> offsets; 857 858 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 859 subst, 860 &offsets); 861 EXPECT_EQ(2U, offsets.size()); 862 EXPECT_EQ(7U, offsets[0]); 863 EXPECT_EQ(25U, offsets[1]); 864 offsets.clear(); 865 866 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 867 subst, 868 &offsets); 869 EXPECT_EQ(2U, offsets.size()); 870 EXPECT_EQ(25U, offsets[0]); 871 EXPECT_EQ(7U, offsets[1]); 872 offsets.clear(); 873} 874 875TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 876 // Test whether replacestringplaceholders works as expected when there 877 // are fewer inputs than outputs. 878 std::vector<string16> subst; 879 subst.push_back(ASCIIToUTF16("9a")); 880 subst.push_back(ASCIIToUTF16("8b")); 881 subst.push_back(ASCIIToUTF16("7c")); 882 883 string16 formatted = 884 ReplaceStringPlaceholders( 885 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL); 886 887 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci")); 888} 889 890TEST(StringUtilTest, ReplaceStringPlaceholders) { 891 std::vector<string16> subst; 892 subst.push_back(ASCIIToUTF16("9a")); 893 subst.push_back(ASCIIToUTF16("8b")); 894 subst.push_back(ASCIIToUTF16("7c")); 895 subst.push_back(ASCIIToUTF16("6d")); 896 subst.push_back(ASCIIToUTF16("5e")); 897 subst.push_back(ASCIIToUTF16("4f")); 898 subst.push_back(ASCIIToUTF16("3g")); 899 subst.push_back(ASCIIToUTF16("2h")); 900 subst.push_back(ASCIIToUTF16("1i")); 901 902 string16 formatted = 903 ReplaceStringPlaceholders( 904 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL); 905 906 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii")); 907} 908 909TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) { 910 std::vector<string16> subst; 911 subst.push_back(ASCIIToUTF16("9a")); 912 subst.push_back(ASCIIToUTF16("8b")); 913 subst.push_back(ASCIIToUTF16("7c")); 914 subst.push_back(ASCIIToUTF16("6d")); 915 subst.push_back(ASCIIToUTF16("5e")); 916 subst.push_back(ASCIIToUTF16("4f")); 917 subst.push_back(ASCIIToUTF16("3g")); 918 subst.push_back(ASCIIToUTF16("2h")); 919 subst.push_back(ASCIIToUTF16("1i")); 920 subst.push_back(ASCIIToUTF16("0j")); 921 subst.push_back(ASCIIToUTF16("-1k")); 922 subst.push_back(ASCIIToUTF16("-2l")); 923 subst.push_back(ASCIIToUTF16("-3m")); 924 subst.push_back(ASCIIToUTF16("-4n")); 925 926 string16 formatted = 927 ReplaceStringPlaceholders( 928 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i," 929 "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL); 930 931 EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh," 932 "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a")); 933} 934 935TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 936 std::vector<std::string> subst; 937 subst.push_back("9a"); 938 subst.push_back("8b"); 939 subst.push_back("7c"); 940 subst.push_back("6d"); 941 subst.push_back("5e"); 942 subst.push_back("4f"); 943 subst.push_back("3g"); 944 subst.push_back("2h"); 945 subst.push_back("1i"); 946 947 std::string formatted = 948 ReplaceStringPlaceholders( 949 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL); 950 951 EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"); 952} 953 954TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { 955 std::vector<std::string> subst; 956 subst.push_back("a"); 957 subst.push_back("b"); 958 subst.push_back("c"); 959 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL), 960 "$1 $$2 $$$3"); 961} 962 963TEST(StringUtilTest, MatchPatternTest) { 964 EXPECT_TRUE(MatchPattern("www.google.com", "*.com")); 965 EXPECT_TRUE(MatchPattern("www.google.com", "*")); 966 EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org")); 967 EXPECT_TRUE(MatchPattern("Hello", "H?l?o")); 968 EXPECT_FALSE(MatchPattern("www.google.com", "http://*)")); 969 EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM")); 970 EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*")); 971 EXPECT_FALSE(MatchPattern("", "*.*")); 972 EXPECT_TRUE(MatchPattern("", "*")); 973 EXPECT_TRUE(MatchPattern("", "?")); 974 EXPECT_TRUE(MatchPattern("", "")); 975 EXPECT_FALSE(MatchPattern("Hello", "")); 976 EXPECT_TRUE(MatchPattern("Hello*", "Hello*")); 977 // Stop after a certain recursion depth. 978 EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*")); 979 980 // Test UTF8 matching. 981 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0")); 982 EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?.")); 983 EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*")); 984 // Invalid sequences should be handled as a single invalid character. 985 EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?")); 986 // If the pattern has invalid characters, it shouldn't match anything. 987 EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80")); 988 989 // Test UTF16 character matching. 990 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"), 991 UTF8ToUTF16("*.com"))); 992 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"), 993 UTF8ToUTF16("He??o\\*1*"))); 994 995 // This test verifies that consecutive wild cards are collapsed into 1 996 // wildcard (when this doesn't occur, MatchPattern reaches it's maximum 997 // recursion depth). 998 EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"), 999 UTF8ToUTF16("He********************************o"))); 1000} 1001 1002TEST(StringUtilTest, LcpyTest) { 1003 // Test the normal case where we fit in our buffer. 1004 { 1005 char dst[10]; 1006 wchar_t wdst[10]; 1007 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1008 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1009 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1010 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1011 } 1012 1013 // Test dst_size == 0, nothing should be written to |dst| and we should 1014 // have the equivalent of strlen(src). 1015 { 1016 char dst[2] = {1, 2}; 1017 wchar_t wdst[2] = {1, 2}; 1018 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0)); 1019 EXPECT_EQ(1, dst[0]); 1020 EXPECT_EQ(2, dst[1]); 1021 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0)); 1022#if defined(WCHAR_T_IS_UNSIGNED) 1023 EXPECT_EQ(1U, wdst[0]); 1024 EXPECT_EQ(2U, wdst[1]); 1025#else 1026 EXPECT_EQ(1, wdst[0]); 1027 EXPECT_EQ(2, wdst[1]); 1028#endif 1029 } 1030 1031 // Test the case were we _just_ competely fit including the null. 1032 { 1033 char dst[8]; 1034 wchar_t wdst[8]; 1035 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1036 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 1037 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1038 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 1039 } 1040 1041 // Test the case were we we are one smaller, so we can't fit the null. 1042 { 1043 char dst[7]; 1044 wchar_t wdst[7]; 1045 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1046 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 1047 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1048 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 1049 } 1050 1051 // Test the case were we are just too small. 1052 { 1053 char dst[3]; 1054 wchar_t wdst[3]; 1055 EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst))); 1056 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 1057 EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 1058 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 1059 } 1060} 1061 1062TEST(StringUtilTest, WprintfFormatPortabilityTest) { 1063 static const struct { 1064 const wchar_t* input; 1065 bool portable; 1066 } cases[] = { 1067 { L"%ls", true }, 1068 { L"%s", false }, 1069 { L"%S", false }, 1070 { L"%lS", false }, 1071 { L"Hello, %s", false }, 1072 { L"%lc", true }, 1073 { L"%c", false }, 1074 { L"%C", false }, 1075 { L"%lC", false }, 1076 { L"%ls %s", false }, 1077 { L"%s %ls", false }, 1078 { L"%s %ls %s", false }, 1079 { L"%f", true }, 1080 { L"%f %F", false }, 1081 { L"%d %D", false }, 1082 { L"%o %O", false }, 1083 { L"%u %U", false }, 1084 { L"%f %d %o %u", true }, 1085 { L"%-8d (%02.1f%)", true }, 1086 { L"% 10s", false }, 1087 { L"% 10ls", true } 1088 }; 1089 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 1090 EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input)); 1091} 1092 1093TEST(StringUtilTest, RemoveChars) { 1094 const char* kRemoveChars = "-/+*"; 1095 std::string input = "A-+bc/d!*"; 1096 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); 1097 EXPECT_EQ("Abcd!", input); 1098 1099 // No characters match kRemoveChars. 1100 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1101 EXPECT_EQ("Abcd!", input); 1102 1103 // Empty string. 1104 input.clear(); 1105 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 1106 EXPECT_EQ(std::string(), input); 1107} 1108 1109TEST(StringUtilTest, ReplaceChars) { 1110 struct TestData { 1111 const char* input; 1112 const char* replace_chars; 1113 const char* replace_with; 1114 const char* output; 1115 bool result; 1116 } cases[] = { 1117 { "", "", "", "", false }, 1118 { "test", "", "", "test", false }, 1119 { "test", "", "!", "test", false }, 1120 { "test", "z", "!", "test", false }, 1121 { "test", "e", "!", "t!st", true }, 1122 { "test", "e", "!?", "t!?st", true }, 1123 { "test", "ez", "!", "t!st", true }, 1124 { "test", "zed", "!?", "t!?st", true }, 1125 { "test", "t", "!?", "!?es!?", true }, 1126 { "test", "et", "!>", "!>!>s!>", true }, 1127 { "test", "zest", "!", "!!!!", true }, 1128 { "test", "szt", "!", "!e!!", true }, 1129 { "test", "t", "test", "testestest", true }, 1130 }; 1131 1132 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 1133 std::string output; 1134 bool result = ReplaceChars(cases[i].input, 1135 cases[i].replace_chars, 1136 cases[i].replace_with, 1137 &output); 1138 EXPECT_EQ(cases[i].result, result); 1139 EXPECT_EQ(cases[i].output, output); 1140 } 1141} 1142 1143TEST(StringUtilTest, ContainsOnlyChars) { 1144 // Providing an empty list of characters should return false but for the empty 1145 // string. 1146 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string())); 1147 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string())); 1148 1149 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234")); 1150 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1151 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1152 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1153 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1154} 1155 1156class WriteIntoTest : public testing::Test { 1157 protected: 1158 static void WritesCorrectly(size_t num_chars) { 1159 std::string buffer; 1160 char kOriginal[] = "supercali"; 1161 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars); 1162 // Using std::string(buffer.c_str()) instead of |buffer| truncates the 1163 // string at the first \0. 1164 EXPECT_EQ(std::string(kOriginal, 1165 std::min(num_chars, arraysize(kOriginal) - 1)), 1166 std::string(buffer.c_str())); 1167 EXPECT_EQ(num_chars, buffer.size()); 1168 } 1169}; 1170 1171TEST_F(WriteIntoTest, WriteInto) { 1172 // Validate that WriteInto reserves enough space and 1173 // sizes a string correctly. 1174 WritesCorrectly(1); 1175 WritesCorrectly(2); 1176 WritesCorrectly(5000); 1177 1178 // Validate that WriteInto doesn't modify other strings 1179 // when using a Copy-on-Write implementation. 1180 const char kLive[] = "live"; 1181 const char kDead[] = "dead"; 1182 const std::string live = kLive; 1183 std::string dead = live; 1184 strncpy(WriteInto(&dead, 5), kDead, 4); 1185 EXPECT_EQ(kDead, dead); 1186 EXPECT_EQ(4u, dead.size()); 1187 EXPECT_EQ(kLive, live); 1188 EXPECT_EQ(4u, live.size()); 1189} 1190 1191} // namespace base 1192