1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6#include <string>
7
8#include "net/base/escape.h"
9
10#include "base/basictypes.h"
11#include "base/strings/string_util.h"
12#include "base/strings/stringprintf.h"
13#include "base/strings/utf_string_conversions.h"
14#include "testing/gtest/include/gtest/gtest.h"
15
16namespace net {
17namespace {
18
19struct EscapeCase {
20  const char* input;
21  const char* output;
22};
23
24struct UnescapeURLCase {
25  const wchar_t* input;
26  UnescapeRule::Type rules;
27  const wchar_t* output;
28};
29
30struct UnescapeURLCaseASCII {
31  const char* input;
32  UnescapeRule::Type rules;
33  const char* output;
34};
35
36struct UnescapeAndDecodeCase {
37  const char* input;
38
39  // The expected output when run through UnescapeURL.
40  const char* url_unescaped;
41
42  // The expected output when run through UnescapeQuery.
43  const char* query_unescaped;
44
45  // The expected output when run through UnescapeAndDecodeURLComponent.
46  const wchar_t* decoded;
47};
48
49struct AdjustOffsetCase {
50  const char* input;
51  size_t input_offset;
52  size_t output_offset;
53};
54
55struct EscapeForHTMLCase {
56  const char* input;
57  const char* expected_output;
58};
59
60TEST(EscapeTest, EscapeTextForFormSubmission) {
61  const EscapeCase escape_cases[] = {
62    {"foo", "foo"},
63    {"foo bar", "foo+bar"},
64    {"foo++", "foo%2B%2B"}
65  };
66  for (size_t i = 0; i < arraysize(escape_cases); ++i) {
67    EscapeCase value = escape_cases[i];
68    EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true));
69  }
70
71  const EscapeCase escape_cases_no_plus[] = {
72    {"foo", "foo"},
73    {"foo bar", "foo%20bar"},
74    {"foo++", "foo%2B%2B"}
75  };
76  for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
77    EscapeCase value = escape_cases_no_plus[i];
78    EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false));
79  }
80
81  // Test all the values in we're supposed to be escaping.
82  const std::string no_escape(
83    "abcdefghijklmnopqrstuvwxyz"
84    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
85    "0123456789"
86    "!'()*-._~");
87  for (int i = 0; i < 256; ++i) {
88    std::string in;
89    in.push_back(i);
90    std::string out = EscapeQueryParamValue(in, true);
91    if (0 == i) {
92      EXPECT_EQ(out, std::string("%00"));
93    } else if (32 == i) {
94      // Spaces are plus escaped like web forms.
95      EXPECT_EQ(out, std::string("+"));
96    } else if (no_escape.find(in) == std::string::npos) {
97      // Check %hex escaping
98      std::string expected = base::StringPrintf("%%%02X", i);
99      EXPECT_EQ(expected, out);
100    } else {
101      // No change for things in the no_escape list.
102      EXPECT_EQ(out, in);
103    }
104  }
105}
106
107TEST(EscapeTest, EscapePath) {
108  ASSERT_EQ(
109    // Most of the character space we care about, un-escaped
110    EscapePath(
111      "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
112      "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
113      "[\\]^_`abcdefghijklmnopqrstuvwxyz"
114      "{|}~\x7f\x80\xff"),
115    // Escaped
116    "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
117    "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
118    "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
119    "%7B%7C%7D~%7F%80%FF");
120}
121
122TEST(EscapeTest, DataURLWithAccentedCharacters) {
123  const std::string url =
124      "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
125      "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
126
127  base::OffsetAdjuster::Adjustments adjustments;
128  net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(
129      url, UnescapeRule::SPACES, &adjustments);
130}
131
132TEST(EscapeTest, EscapeUrlEncodedData) {
133  ASSERT_EQ(
134    // Most of the character space we care about, un-escaped
135    EscapeUrlEncodedData(
136      "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
137      "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
138      "[\\]^_`abcdefghijklmnopqrstuvwxyz"
139      "{|}~\x7f\x80\xff", true),
140    // Escaped
141    "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
142    "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
143    "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
144    "%7B%7C%7D~%7F%80%FF");
145}
146
147TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
148  ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
149  ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
150}
151
152TEST(EscapeTest, UnescapeURLComponentASCII) {
153  const UnescapeURLCaseASCII unescape_cases[] = {
154    {"", UnescapeRule::NORMAL, ""},
155    {"%2", UnescapeRule::NORMAL, "%2"},
156    {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
157    {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
158    {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
159    {"Some%20random text %25%2dOK", UnescapeRule::NONE,
160     "Some%20random text %25%2dOK"},
161    {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
162     "Some%20random text %25-OK"},
163    {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
164     "Some random text %25-OK"},
165    {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
166     "Some%20random text %-OK"},
167    {"Some%20random text %25%2dOK",
168     UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
169     "Some random text %-OK"},
170    {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
171    {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
172    // Certain URL-sensitive characters should not be unescaped unless asked.
173    {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
174     "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
175    {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
176     UnescapeRule::URL_SPECIAL_CHARS,
177     "Hello%20%13%10world ## ?? == && %% ++"},
178    // We can neither escape nor unescape '@' since some websites expect it to
179    // be preserved as either '@' or "%40".
180    // See http://b/996720 and http://crbug.com/23933 .
181    {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
182    // Control characters.
183    {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
184     "%01%02%03%04%05%06%07%08%09 %"},
185    {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
186     "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
187    {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
188    {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
189  };
190
191  for (size_t i = 0; i < arraysize(unescape_cases); i++) {
192    std::string str(unescape_cases[i].input);
193    EXPECT_EQ(std::string(unescape_cases[i].output),
194              UnescapeURLComponent(str, unescape_cases[i].rules));
195  }
196
197  // Test the NULL character unescaping (which wouldn't work above since those
198  // are just char pointers).
199  std::string input("Null");
200  input.push_back(0);  // Also have a NULL in the input.
201  input.append("%00%39Test");
202
203  // When we're unescaping NULLs
204  std::string expected("Null");
205  expected.push_back(0);
206  expected.push_back(0);
207  expected.append("9Test");
208  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
209
210  // When we're not unescaping NULLs.
211  expected = "Null";
212  expected.push_back(0);
213  expected.append("%009Test");
214  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
215}
216
217TEST(EscapeTest, UnescapeURLComponent) {
218  const UnescapeURLCase unescape_cases[] = {
219    {L"", UnescapeRule::NORMAL, L""},
220    {L"%2", UnescapeRule::NORMAL, L"%2"},
221    {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
222    {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
223    {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
224    {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
225     L"Some%20random text %25%2dOK"},
226    {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
227     L"Some%20random text %25-OK"},
228    {L"Some%20random text %25%E2%80", UnescapeRule::NORMAL,
229     L"Some%20random text %25\xE2\x80"},
230    {L"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL,
231     L"Some%20random text %25\xE2\x80OK"},
232    {L"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL,
233     L"Some%20random text %25\xE2\x80\x84OK"},
234
235    // BiDi Control characters should not be unescaped.
236    {L"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
237     L"Some%20random text %25%D8%9COK"},
238    {L"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
239     L"Some%20random text %25%E2%80%8EOK"},
240    {L"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
241     L"Some%20random text %25%E2%80%8FOK"},
242    {L"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
243     L"Some%20random text %25%E2%80%AAOK"},
244    {L"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
245     L"Some%20random text %25%E2%80%ABOK"},
246    {L"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
247     L"Some%20random text %25%E2%80%AEOK"},
248    {L"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
249     L"Some%20random text %25%E2%81%A6OK"},
250    {L"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
251     L"Some%20random text %25%E2%81%A9OK"},
252
253    {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
254     L"Some random text %25-OK"},
255    {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
256     L"Some%20random text %-OK"},
257    {L"Some%20random text %25%2dOK",
258     UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
259     L"Some random text %-OK"},
260    {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
261    {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
262    // Certain URL-sensitive characters should not be unescaped unless asked.
263    {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
264     L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
265    {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
266     UnescapeRule::URL_SPECIAL_CHARS,
267     L"Hello%20%13%10world ## ?? == && %% ++"},
268    // We can neither escape nor unescape '@' since some websites expect it to
269    // be preserved as either '@' or "%40".
270    // See http://b/996720 and http://crbug.com/23933 .
271    {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
272    // Control characters.
273    {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
274     L"%01%02%03%04%05%06%07%08%09 %"},
275    {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
276     L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
277    {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
278    {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
279     L"Hello%20\x13\x10\x02"},
280    {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
281     L"Hello\x9824\x9827"},
282  };
283
284  for (size_t i = 0; i < arraysize(unescape_cases); i++) {
285    base::string16 str(base::WideToUTF16(unescape_cases[i].input));
286    EXPECT_EQ(base::WideToUTF16(unescape_cases[i].output),
287              UnescapeURLComponent(str, unescape_cases[i].rules));
288  }
289
290  // Test the NULL character unescaping (which wouldn't work above since those
291  // are just char pointers).
292  base::string16 input(base::WideToUTF16(L"Null"));
293  input.push_back(0);  // Also have a NULL in the input.
294  input.append(base::WideToUTF16(L"%00%39Test"));
295
296  // When we're unescaping NULLs
297  base::string16 expected(base::WideToUTF16(L"Null"));
298  expected.push_back(0);
299  expected.push_back(0);
300  expected.append(base::ASCIIToUTF16("9Test"));
301  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
302
303  // When we're not unescaping NULLs.
304  expected = base::WideToUTF16(L"Null");
305  expected.push_back(0);
306  expected.append(base::WideToUTF16(L"%009Test"));
307  EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
308}
309
310TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
311  const UnescapeAndDecodeCase unescape_cases[] = {
312    { "%",
313      "%",
314      "%",
315     L"%"},
316    { "+",
317      "+",
318      " ",
319     L"+"},
320    { "%2+",
321      "%2+",
322      "%2 ",
323     L"%2+"},
324    { "+%%%+%%%",
325      "+%%%+%%%",
326      " %%% %%%",
327     L"+%%%+%%%"},
328    { "Don't escape anything",
329      "Don't escape anything",
330      "Don't escape anything",
331     L"Don't escape anything"},
332    { "+Invalid %escape %2+",
333      "+Invalid %escape %2+",
334      " Invalid %escape %2 ",
335     L"+Invalid %escape %2+"},
336    { "Some random text %25%2dOK",
337      "Some random text %25-OK",
338      "Some random text %25-OK",
339     L"Some random text %25-OK"},
340    { "%01%02%03%04%05%06%07%08%09",
341      "%01%02%03%04%05%06%07%08%09",
342      "%01%02%03%04%05%06%07%08%09",
343     L"%01%02%03%04%05%06%07%08%09"},
344    { "%E4%BD%A0+%E5%A5%BD",
345      "\xE4\xBD\xA0+\xE5\xA5\xBD",
346      "\xE4\xBD\xA0 \xE5\xA5\xBD",
347     L"\x4f60+\x597d"},
348    { "%ED%ED",  // Invalid UTF-8.
349      "\xED\xED",
350      "\xED\xED",
351     L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
352  };
353
354  for (size_t i = 0; i < arraysize(unescape_cases); i++) {
355    std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
356                                                 UnescapeRule::NORMAL);
357    EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
358
359    unescaped = UnescapeURLComponent(unescape_cases[i].input,
360                                     UnescapeRule::REPLACE_PLUS_WITH_SPACE);
361    EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
362
363    // TODO: Need to test unescape_spaces and unescape_percent.
364    base::string16 decoded = UnescapeAndDecodeUTF8URLComponent(
365        unescape_cases[i].input, UnescapeRule::NORMAL);
366    EXPECT_EQ(base::WideToUTF16(unescape_cases[i].decoded), decoded);
367  }
368}
369
370TEST(EscapeTest, AdjustOffset) {
371  const AdjustOffsetCase adjust_cases[] = {
372    {"", 0, 0},
373    {"test", 0, 0},
374    {"test", 2, 2},
375    {"test", 4, 4},
376    {"test", std::string::npos, std::string::npos},
377    {"%2dtest", 6, 4},
378    {"%2dtest", 3, 1},
379    {"%2dtest", 2, std::string::npos},
380    {"%2dtest", 1, std::string::npos},
381    {"%2dtest", 0, 0},
382    {"test%2d", 2, 2},
383    {"%E4%BD%A0+%E5%A5%BD", 9, 1},
384    {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
385    {"%E4%BD%A0+%E5%A5%BD", 0, 0},
386    {"%E4%BD%A0+%E5%A5%BD", 10, 2},
387    {"%E4%BD%A0+%E5%A5%BD", 19, 3},
388
389    {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
390    {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
391    {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
392    {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
393    {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
394    {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
395    {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
396    {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
397    {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
398
399    {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
400    {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
401    {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
402    {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
403    {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
404    {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
405    {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
406    {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
407    {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
408
409    {"%ED%B0%80+%E5%A5%BD", 6, 6},  // not convertable to UTF-8
410  };
411
412  for (size_t i = 0; i < arraysize(adjust_cases); i++) {
413    size_t offset = adjust_cases[i].input_offset;
414    base::OffsetAdjuster::Adjustments adjustments;
415    UnescapeAndDecodeUTF8URLComponentWithAdjustments(
416        adjust_cases[i].input, UnescapeRule::NORMAL, &adjustments);
417    base::OffsetAdjuster::AdjustOffset(adjustments, &offset);
418    EXPECT_EQ(adjust_cases[i].output_offset, offset)
419        << "input=" << adjust_cases[i].input
420        << " offset=" << adjust_cases[i].input_offset;
421  }
422}
423
424TEST(EscapeTest, EscapeForHTML) {
425  const EscapeForHTMLCase tests[] = {
426    { "hello", "hello" },
427    { "<hello>", "&lt;hello&gt;" },
428    { "don\'t mess with me", "don&#39;t mess with me" },
429  };
430  for (size_t i = 0; i < arraysize(tests); ++i) {
431    std::string result = EscapeForHTML(std::string(tests[i].input));
432    EXPECT_EQ(std::string(tests[i].expected_output), result);
433  }
434}
435
436TEST(EscapeTest, UnescapeForHTML) {
437  const EscapeForHTMLCase tests[] = {
438    { "", "" },
439    { "&lt;hello&gt;", "<hello>" },
440    { "don&#39;t mess with me", "don\'t mess with me" },
441    { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
442    { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
443    { "&amp;", "&" },
444    { "&quot;", "\"" },
445    { "&#39;", "'" },
446    { "&lt;", "<" },
447    { "&gt;", ">" },
448    { "&amp; &", "& &" },
449  };
450  for (size_t i = 0; i < arraysize(tests); ++i) {
451    base::string16 result = UnescapeForHTML(base::ASCIIToUTF16(tests[i].input));
452    EXPECT_EQ(base::ASCIIToUTF16(tests[i].expected_output), result);
453  }
454}
455
456
457}  // namespace
458}  // namespace net
459