1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <algorithm> 6#include <string> 7 8#include "net/base/escape.h" 9 10#include "base/basictypes.h" 11#include "base/strings/string_util.h" 12#include "base/strings/stringprintf.h" 13#include "base/strings/utf_string_conversions.h" 14#include "testing/gtest/include/gtest/gtest.h" 15 16namespace net { 17namespace { 18 19struct EscapeCase { 20 const char* input; 21 const char* output; 22}; 23 24struct UnescapeURLCase { 25 const wchar_t* input; 26 UnescapeRule::Type rules; 27 const wchar_t* output; 28}; 29 30struct UnescapeURLCaseASCII { 31 const char* input; 32 UnescapeRule::Type rules; 33 const char* output; 34}; 35 36struct UnescapeAndDecodeCase { 37 const char* input; 38 39 // The expected output when run through UnescapeURL. 40 const char* url_unescaped; 41 42 // The expected output when run through UnescapeQuery. 43 const char* query_unescaped; 44 45 // The expected output when run through UnescapeAndDecodeURLComponent. 46 const wchar_t* decoded; 47}; 48 49struct AdjustOffsetCase { 50 const char* input; 51 size_t input_offset; 52 size_t output_offset; 53}; 54 55struct EscapeForHTMLCase { 56 const char* input; 57 const char* expected_output; 58}; 59 60TEST(EscapeTest, EscapeTextForFormSubmission) { 61 const EscapeCase escape_cases[] = { 62 {"foo", "foo"}, 63 {"foo bar", "foo+bar"}, 64 {"foo++", "foo%2B%2B"} 65 }; 66 for (size_t i = 0; i < arraysize(escape_cases); ++i) { 67 EscapeCase value = escape_cases[i]; 68 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true)); 69 } 70 71 const EscapeCase escape_cases_no_plus[] = { 72 {"foo", "foo"}, 73 {"foo bar", "foo%20bar"}, 74 {"foo++", "foo%2B%2B"} 75 }; 76 for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { 77 EscapeCase value = escape_cases_no_plus[i]; 78 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false)); 79 } 80 81 // Test all the values in we're supposed to be escaping. 82 const std::string no_escape( 83 "abcdefghijklmnopqrstuvwxyz" 84 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 85 "0123456789" 86 "!'()*-._~"); 87 for (int i = 0; i < 256; ++i) { 88 std::string in; 89 in.push_back(i); 90 std::string out = EscapeQueryParamValue(in, true); 91 if (0 == i) { 92 EXPECT_EQ(out, std::string("%00")); 93 } else if (32 == i) { 94 // Spaces are plus escaped like web forms. 95 EXPECT_EQ(out, std::string("+")); 96 } else if (no_escape.find(in) == std::string::npos) { 97 // Check %hex escaping 98 std::string expected = base::StringPrintf("%%%02X", i); 99 EXPECT_EQ(expected, out); 100 } else { 101 // No change for things in the no_escape list. 102 EXPECT_EQ(out, in); 103 } 104 } 105} 106 107TEST(EscapeTest, EscapePath) { 108 ASSERT_EQ( 109 // Most of the character space we care about, un-escaped 110 EscapePath( 111 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 112 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 113 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 114 "{|}~\x7f\x80\xff"), 115 // Escaped 116 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" 117 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 118 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 119 "%7B%7C%7D~%7F%80%FF"); 120} 121 122TEST(EscapeTest, DataURLWithAccentedCharacters) { 123 const std::string url = 124 "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3" 125 "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20"; 126 127 base::OffsetAdjuster::Adjustments adjustments; 128 net::UnescapeAndDecodeUTF8URLComponentWithAdjustments( 129 url, UnescapeRule::SPACES, &adjustments); 130} 131 132TEST(EscapeTest, EscapeUrlEncodedData) { 133 ASSERT_EQ( 134 // Most of the character space we care about, un-escaped 135 EscapeUrlEncodedData( 136 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 137 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 138 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 139 "{|}~\x7f\x80\xff", true), 140 // Escaped 141 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" 142 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" 143 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 144 "%7B%7C%7D~%7F%80%FF"); 145} 146 147TEST(EscapeTest, EscapeUrlEncodedDataSpace) { 148 ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); 149 ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); 150} 151 152TEST(EscapeTest, UnescapeURLComponentASCII) { 153 const UnescapeURLCaseASCII unescape_cases[] = { 154 {"", UnescapeRule::NORMAL, ""}, 155 {"%2", UnescapeRule::NORMAL, "%2"}, 156 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, 157 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, 158 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, 159 {"Some%20random text %25%2dOK", UnescapeRule::NONE, 160 "Some%20random text %25%2dOK"}, 161 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, 162 "Some%20random text %25-OK"}, 163 {"Some%20random text %25%2dOK", UnescapeRule::SPACES, 164 "Some random text %25-OK"}, 165 {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, 166 "Some%20random text %-OK"}, 167 {"Some%20random text %25%2dOK", 168 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 169 "Some random text %-OK"}, 170 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, 171 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, 172 // Certain URL-sensitive characters should not be unescaped unless asked. 173 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 174 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 175 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 176 UnescapeRule::URL_SPECIAL_CHARS, 177 "Hello%20%13%10world ## ?? == && %% ++"}, 178 // We can neither escape nor unescape '@' since some websites expect it to 179 // be preserved as either '@' or "%40". 180 // See http://b/996720 and http://crbug.com/23933 . 181 {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, 182 // Control characters. 183 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 184 "%01%02%03%04%05%06%07%08%09 %"}, 185 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 186 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 187 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, 188 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, 189 }; 190 191 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 192 std::string str(unescape_cases[i].input); 193 EXPECT_EQ(std::string(unescape_cases[i].output), 194 UnescapeURLComponent(str, unescape_cases[i].rules)); 195 } 196 197 // Test the NULL character unescaping (which wouldn't work above since those 198 // are just char pointers). 199 std::string input("Null"); 200 input.push_back(0); // Also have a NULL in the input. 201 input.append("%00%39Test"); 202 203 // When we're unescaping NULLs 204 std::string expected("Null"); 205 expected.push_back(0); 206 expected.push_back(0); 207 expected.append("9Test"); 208 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 209 210 // When we're not unescaping NULLs. 211 expected = "Null"; 212 expected.push_back(0); 213 expected.append("%009Test"); 214 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 215} 216 217TEST(EscapeTest, UnescapeURLComponent) { 218 const UnescapeURLCase unescape_cases[] = { 219 {L"", UnescapeRule::NORMAL, L""}, 220 {L"%2", UnescapeRule::NORMAL, L"%2"}, 221 {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, 222 {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, 223 {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, 224 {L"Some%20random text %25%2dOK", UnescapeRule::NONE, 225 L"Some%20random text %25%2dOK"}, 226 {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, 227 L"Some%20random text %25-OK"}, 228 {L"Some%20random text %25%E2%80", UnescapeRule::NORMAL, 229 L"Some%20random text %25\xE2\x80"}, 230 {L"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL, 231 L"Some%20random text %25\xE2\x80OK"}, 232 {L"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL, 233 L"Some%20random text %25\xE2\x80\x84OK"}, 234 235 // BiDi Control characters should not be unescaped. 236 {L"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL, 237 L"Some%20random text %25%D8%9COK"}, 238 {L"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL, 239 L"Some%20random text %25%E2%80%8EOK"}, 240 {L"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL, 241 L"Some%20random text %25%E2%80%8FOK"}, 242 {L"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL, 243 L"Some%20random text %25%E2%80%AAOK"}, 244 {L"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL, 245 L"Some%20random text %25%E2%80%ABOK"}, 246 {L"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL, 247 L"Some%20random text %25%E2%80%AEOK"}, 248 {L"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL, 249 L"Some%20random text %25%E2%81%A6OK"}, 250 {L"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL, 251 L"Some%20random text %25%E2%81%A9OK"}, 252 253 {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, 254 L"Some random text %25-OK"}, 255 {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, 256 L"Some%20random text %-OK"}, 257 {L"Some%20random text %25%2dOK", 258 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 259 L"Some random text %-OK"}, 260 {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, 261 {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, 262 // Certain URL-sensitive characters should not be unescaped unless asked. 263 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 264 L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 265 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 266 UnescapeRule::URL_SPECIAL_CHARS, 267 L"Hello%20%13%10world ## ?? == && %% ++"}, 268 // We can neither escape nor unescape '@' since some websites expect it to 269 // be preserved as either '@' or "%40". 270 // See http://b/996720 and http://crbug.com/23933 . 271 {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, 272 // Control characters. 273 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 274 L"%01%02%03%04%05%06%07%08%09 %"}, 275 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 276 L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 277 {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, 278 {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, 279 L"Hello%20\x13\x10\x02"}, 280 {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, 281 L"Hello\x9824\x9827"}, 282 }; 283 284 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 285 base::string16 str(base::WideToUTF16(unescape_cases[i].input)); 286 EXPECT_EQ(base::WideToUTF16(unescape_cases[i].output), 287 UnescapeURLComponent(str, unescape_cases[i].rules)); 288 } 289 290 // Test the NULL character unescaping (which wouldn't work above since those 291 // are just char pointers). 292 base::string16 input(base::WideToUTF16(L"Null")); 293 input.push_back(0); // Also have a NULL in the input. 294 input.append(base::WideToUTF16(L"%00%39Test")); 295 296 // When we're unescaping NULLs 297 base::string16 expected(base::WideToUTF16(L"Null")); 298 expected.push_back(0); 299 expected.push_back(0); 300 expected.append(base::ASCIIToUTF16("9Test")); 301 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 302 303 // When we're not unescaping NULLs. 304 expected = base::WideToUTF16(L"Null"); 305 expected.push_back(0); 306 expected.append(base::WideToUTF16(L"%009Test")); 307 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 308} 309 310TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { 311 const UnescapeAndDecodeCase unescape_cases[] = { 312 { "%", 313 "%", 314 "%", 315 L"%"}, 316 { "+", 317 "+", 318 " ", 319 L"+"}, 320 { "%2+", 321 "%2+", 322 "%2 ", 323 L"%2+"}, 324 { "+%%%+%%%", 325 "+%%%+%%%", 326 " %%% %%%", 327 L"+%%%+%%%"}, 328 { "Don't escape anything", 329 "Don't escape anything", 330 "Don't escape anything", 331 L"Don't escape anything"}, 332 { "+Invalid %escape %2+", 333 "+Invalid %escape %2+", 334 " Invalid %escape %2 ", 335 L"+Invalid %escape %2+"}, 336 { "Some random text %25%2dOK", 337 "Some random text %25-OK", 338 "Some random text %25-OK", 339 L"Some random text %25-OK"}, 340 { "%01%02%03%04%05%06%07%08%09", 341 "%01%02%03%04%05%06%07%08%09", 342 "%01%02%03%04%05%06%07%08%09", 343 L"%01%02%03%04%05%06%07%08%09"}, 344 { "%E4%BD%A0+%E5%A5%BD", 345 "\xE4\xBD\xA0+\xE5\xA5\xBD", 346 "\xE4\xBD\xA0 \xE5\xA5\xBD", 347 L"\x4f60+\x597d"}, 348 { "%ED%ED", // Invalid UTF-8. 349 "\xED\xED", 350 "\xED\xED", 351 L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. 352 }; 353 354 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 355 std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, 356 UnescapeRule::NORMAL); 357 EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); 358 359 unescaped = UnescapeURLComponent(unescape_cases[i].input, 360 UnescapeRule::REPLACE_PLUS_WITH_SPACE); 361 EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); 362 363 // TODO: Need to test unescape_spaces and unescape_percent. 364 base::string16 decoded = UnescapeAndDecodeUTF8URLComponent( 365 unescape_cases[i].input, UnescapeRule::NORMAL); 366 EXPECT_EQ(base::WideToUTF16(unescape_cases[i].decoded), decoded); 367 } 368} 369 370TEST(EscapeTest, AdjustOffset) { 371 const AdjustOffsetCase adjust_cases[] = { 372 {"", 0, 0}, 373 {"test", 0, 0}, 374 {"test", 2, 2}, 375 {"test", 4, 4}, 376 {"test", std::string::npos, std::string::npos}, 377 {"%2dtest", 6, 4}, 378 {"%2dtest", 3, 1}, 379 {"%2dtest", 2, std::string::npos}, 380 {"%2dtest", 1, std::string::npos}, 381 {"%2dtest", 0, 0}, 382 {"test%2d", 2, 2}, 383 {"%E4%BD%A0+%E5%A5%BD", 9, 1}, 384 {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, 385 {"%E4%BD%A0+%E5%A5%BD", 0, 0}, 386 {"%E4%BD%A0+%E5%A5%BD", 10, 2}, 387 {"%E4%BD%A0+%E5%A5%BD", 19, 3}, 388 389 {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8}, 390 {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos}, 391 {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7}, 392 {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9}, 393 {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10}, 394 {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0}, 395 {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2}, 396 {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos}, 397 {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3}, 398 399 {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1}, 400 {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos}, 401 {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0}, 402 {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2}, 403 {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3}, 404 {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5}, 405 {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos}, 406 {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6}, 407 {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10}, 408 409 {"%ED%B0%80+%E5%A5%BD", 6, 6}, // not convertable to UTF-8 410 }; 411 412 for (size_t i = 0; i < arraysize(adjust_cases); i++) { 413 size_t offset = adjust_cases[i].input_offset; 414 base::OffsetAdjuster::Adjustments adjustments; 415 UnescapeAndDecodeUTF8URLComponentWithAdjustments( 416 adjust_cases[i].input, UnescapeRule::NORMAL, &adjustments); 417 base::OffsetAdjuster::AdjustOffset(adjustments, &offset); 418 EXPECT_EQ(adjust_cases[i].output_offset, offset) 419 << "input=" << adjust_cases[i].input 420 << " offset=" << adjust_cases[i].input_offset; 421 } 422} 423 424TEST(EscapeTest, EscapeForHTML) { 425 const EscapeForHTMLCase tests[] = { 426 { "hello", "hello" }, 427 { "<hello>", "<hello>" }, 428 { "don\'t mess with me", "don't mess with me" }, 429 }; 430 for (size_t i = 0; i < arraysize(tests); ++i) { 431 std::string result = EscapeForHTML(std::string(tests[i].input)); 432 EXPECT_EQ(std::string(tests[i].expected_output), result); 433 } 434} 435 436TEST(EscapeTest, UnescapeForHTML) { 437 const EscapeForHTMLCase tests[] = { 438 { "", "" }, 439 { "<hello>", "<hello>" }, 440 { "don't mess with me", "don\'t mess with me" }, 441 { "<>&"'", "<>&\"'" }, 442 { "& lt; & ; &; '", "& lt; & ; &; '" }, 443 { "&", "&" }, 444 { """, "\"" }, 445 { "'", "'" }, 446 { "<", "<" }, 447 { ">", ">" }, 448 { "& &", "& &" }, 449 }; 450 for (size_t i = 0; i < arraysize(tests); ++i) { 451 base::string16 result = UnescapeForHTML(base::ASCIIToUTF16(tests[i].input)); 452 EXPECT_EQ(base::ASCIIToUTF16(tests[i].expected_output), result); 453 } 454} 455 456 457} // namespace 458} // namespace net 459