1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/net_util.h"
6
7#include <string.h>
8
9#include <vector>
10
11#include "base/format_macros.h"
12#include "base/strings/string_number_conversions.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "testing/gtest/include/gtest/gtest.h"
16#include "url/gurl.h"
17
18using base::ASCIIToUTF16;
19using base::WideToUTF16;
20
21namespace net {
22
23namespace {
24
25static const size_t kNpos = base::string16::npos;
26
27const char* kLanguages[] = {
28  "",      "en",    "zh-CN",    "ja",    "ko",
29  "he",    "ar",    "ru",       "el",    "fr",
30  "de",    "pt",    "sv",       "th",    "hi",
31  "de,en", "el,en", "zh-TW,en", "ko,ja", "he,ru,en",
32  "zh,ru,en"
33};
34
35struct IDNTestCase {
36  const char* input;
37  const wchar_t* unicode_output;
38  const bool unicode_allowed[arraysize(kLanguages)];
39};
40
41// TODO(jungshik) This is just a random sample of languages and is far
42// from exhaustive.  We may have to generate all the combinations
43// of languages (powerset of a set of all the languages).
44const IDNTestCase idn_cases[] = {
45  // No IDN
46  {"www.google.com", L"www.google.com",
47   {true,  true,  true,  true,  true,
48    true,  true,  true,  true,  true,
49    true,  true,  true,  true,  true,
50    true,  true,  true,  true,  true,
51    true}},
52  {"www.google.com.", L"www.google.com.",
53   {true,  true,  true,  true,  true,
54    true,  true,  true,  true,  true,
55    true,  true,  true,  true,  true,
56    true,  true,  true,  true,  true,
57    true}},
58  {".", L".",
59   {true,  true,  true,  true,  true,
60    true,  true,  true,  true,  true,
61    true,  true,  true,  true,  true,
62    true,  true,  true,  true,  true,
63    true}},
64  {"", L"",
65   {true,  true,  true,  true,  true,
66    true,  true,  true,  true,  true,
67    true,  true,  true,  true,  true,
68    true,  true,  true,  true,  true,
69    true}},
70  // IDN
71  // Hanzi (Traditional Chinese)
72  {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
73   {true,  false, true,  true,  false,
74    false, false, false, false, false,
75    false, false, false, false, false,
76    false, false, true,  true,  false,
77    true}},
78  // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
79  {"xn--cy2a840a.com", L"\x89c6\x9891.com",
80   {true,  false, true,  false,  false,
81    false, false, false, false, false,
82    false, false, false, false, false,
83    false, false, false, false,  false,
84    true}},
85  // Hanzi + '123'
86  {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
87   {true,  false, true,  true,  false,
88    false, false, false, false, false,
89    false, false, false, false, false,
90    false, false, true,  true,  false,
91    true}},
92  // Hanzi + Latin : U+56FD is simplified and is regarded
93  // as not supported in zh-TW.
94  {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
95   {false, false, true,  true,  false,
96    false, false, false, false, false,
97    false, false, false, false, false,
98    false, false, false, true,  false,
99    true}},
100  // Kanji + Kana (Japanese)
101  {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
102   {true,  false, false, true,  false,
103    false, false, false, false, false,
104    false, false, false, false, false,
105    false, false, false, true,  false,
106    false}},
107  // Katakana including U+30FC
108  {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
109   {true, false, false, true,  false,
110    false, false, false, false, false,
111    false, false, false, false, false,
112    false, false, false, true, false,
113    }},
114  {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
115   {true, false, false, true,  false,
116    false, false, false, false, false,
117    false, false, false, false, false,
118    false, false, false, true, false,
119    }},
120  // Katakana + Latin (Japanese)
121  // TODO(jungshik): Change 'false' in the first element to 'true'
122  // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
123  // of our IsIDNComponentInSingleScript().
124  {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
125   {false, false, false, true,  false,
126    false, false, false, false, false,
127    false, false, false, false, false,
128    false, false, false, true, false,
129    }},
130  {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
131   {false, false, false, true,  false,
132    false, false, false, false, false,
133    false, false, false, false, false,
134    false, false, false, true, false,
135    }},
136  // Hangul (Korean)
137  {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
138   {true,  false, false, false, true,
139    false, false, false, false, false,
140    false, false, false, false, false,
141    false, false, false, true,  false,
142    false}},
143  // b<u-umlaut>cher (German)
144  {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
145   {true,  false, false, false, false,
146    false, false, false, false, true,
147    true,  false,  false, false, false,
148    true,  false, false, false, false,
149    false}},
150  // a with diaeresis
151  {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
152   {true,  false, false, false, false,
153    false, false, false, false, false,
154    true,  false, true, false, false,
155    true,  false, false, false, false,
156    false}},
157  // c-cedilla (French)
158  {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
159   {true,  false, false, false, false,
160    false, false, false, false, true,
161    false, true,  false, false, false,
162    false, false, false, false, false,
163    false}},
164  // caf'e with acute accent' (French)
165  {"xn--caf-dma.fr", L"caf\x00e9.fr",
166   {true,  false, false, false, false,
167    false, false, false, false, true,
168    false, true,  true,  false, false,
169    false, false, false, false, false,
170    false}},
171  // c-cedillla and a with tilde (Portuguese)
172  {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
173   {true,  false, false, false, false,
174    false, false, false, false, false,
175    false, true,  false, false, false,
176    false, false, false, false, false,
177    false}},
178  // s with caron
179  {"xn--achy-f6a.com", L"\x0161" L"achy.com",
180   {true,  false, false, false, false,
181    false, false, false, false, false,
182    false, false, false, false, false,
183    false, false, false, false, false,
184    false}},
185  // TODO(jungshik) : Add examples with Cyrillic letters
186  // only used in some languages written in Cyrillic.
187  // Eutopia (Greek)
188  {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
189   {true,  false, false, false, false,
190    false, false, false, true,  false,
191    false, false, false, false, false,
192    false, true,  false, false, false,
193    false}},
194  // Eutopia + 123 (Greek)
195  {"xn---123-pldm0haj2bk.gr",
196   L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
197   {true,  false, false, false, false,
198    false, false, false, true,  false,
199    false, false, false, false, false,
200    false, true,  false, false, false,
201    false}},
202  // Cyrillic (Russian)
203  {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
204   {true,  false, false, false, false,
205    false, false, true,  false, false,
206    false, false, false, false, false,
207    false, false, false, false, true,
208    true}},
209  // Cyrillic + 123 (Russian)
210  {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
211   {true,  false, false, false, false,
212    false, false, true,  false, false,
213    false, false, false, false, false,
214    false, false, false, false, true,
215    true}},
216  // Arabic
217  {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
218   {true,  false, false, false, false,
219    false, true,  false, false, false,
220    false, false, false, false, false,
221    false, false, false, false, false,
222    false}},
223  // Hebrew
224  {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
225   {true,  false, false, false, false,
226    true,  false, false, false, false,
227    false, false, false, false, false,
228    false, false, false, false, true,
229    false}},
230  // Thai
231  {"xn--12c2cc4ag3b4ccu.th",
232   L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
233   {true,  false, false, false, false,
234    false, false, false, false, false,
235    false, false, false, true,  false,
236    false, false, false, false, false,
237    false}},
238  // Devangari (Hindi)
239  {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
240   {true,  false, false, false, false,
241    false, false, false, false, false,
242    false, false, false, false, true,
243    false, false, false, false, false,
244    false}},
245  // Invalid IDN
246  {"xn--hello?world.com", NULL,
247   {false, false, false, false, false,
248    false, false, false, false, false,
249    false, false, false, false, false,
250    false, false, false, false, false,
251    false}},
252  // Unsafe IDNs
253  // "payp<alpha>l.com"
254  {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
255   {false, false, false, false, false,
256    false, false, false, false, false,
257    false, false, false, false, false,
258    false, false, false, false, false,
259    false}},
260  // google.gr with Greek omicron and epsilon
261  {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
262   {false, false, false, false, false,
263    false, false, false, false, false,
264    false, false, false, false, false,
265    false, false, false, false, false,
266    false}},
267  // google.ru with Cyrillic o
268  {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
269   {false, false, false, false, false,
270    false, false, false, false, false,
271    false, false, false, false, false,
272    false, false, false, false, false,
273    false}},
274  // h<e with acute>llo<China in Han>.cn
275  {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
276   {false, false, false, false, false,
277    false, false, false, false, false,
278    false, false, false, false, false,
279    false, false, false, false, false,
280    false}},
281  // <Greek rho><Cyrillic a><Cyrillic u>.ru
282  {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
283   {false, false, false, false, false,
284    false, false, false, false, false,
285    false, false, false, false, false,
286    false, false, false, false, false,
287    false}},
288  // One that's really long that will force a buffer realloc
289  {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
290       "aaaaaaa",
291   L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
292       L"aaaaaaaa",
293   {true,  true,  true,  true,  true,
294    true,  true,  true,  true,  true,
295    true,  true,  true,  true,  true,
296    true,  true,  true,  true,  true,
297    true}},
298  // Test cases for characters we blacklisted although allowed in IDN.
299  // Embedded spaces will be turned to %20 in the display.
300  // TODO(jungshik): We need to have more cases. This is a typical
301  // data-driven trap. The following test cases need to be separated
302  // and tested only for a couple of languages.
303  {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
304    {false, false, false, false, false,
305     false, false, false, false, false,
306     false, false, false, false, false,
307     false, false, false, false, false,
308     false}},
309  {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
310    {false, false, false, false, false,
311     false, false, false, false, false,
312     false, false, false, false, false,
313     false, false, false, false, false,
314  }},
315  {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
316    {false, false, false, false, false,
317     false, false, false, false, false,
318     false, false, false, false, false,
319     false, false, false, false, false,
320  }},
321  {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
322    {false, false, false, false, false,
323     false, false, false, false, false,
324     false, false, false, false, false,
325     false, false, false, false, false,
326  }},
327  {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
328    {false, false, false, false, false,
329     false, false, false, false, false,
330     false, false, false, false, false,
331     false, false, false, false, false,
332  }},
333#if 0
334  // These two cases are special. We need a separate test.
335  // U+3000 and U+3002 are normalized to ASCII space and dot.
336  {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
337    {false, false, true,  false, false,
338     false, false, false, false, false,
339     false, false, false, false, false,
340     false, false, true,  false, false,
341     true}},
342  {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
343    {false, false, true,  false, false,
344     false, false, false, false, false,
345     false, false, false, false, false,
346     false, false, true,  false, false,
347     true}},
348#endif
349};
350
351struct AdjustOffsetCase {
352  size_t input_offset;
353  size_t output_offset;
354};
355
356struct UrlTestData {
357  const char* description;
358  const char* input;
359  const char* languages;
360  FormatUrlTypes format_types;
361  UnescapeRule::Type escape_rules;
362  const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily.
363  size_t prefix_len;
364};
365
366// A helper for IDN*{Fast,Slow}.
367// Append "::<language list>" to |expected| and |actual| to make it
368// easy to tell which sub-case fails without debugging.
369void AppendLanguagesToOutputs(const char* languages,
370                              base::string16* expected,
371                              base::string16* actual) {
372  base::string16 to_append = ASCIIToUTF16("::") + ASCIIToUTF16(languages);
373  expected->append(to_append);
374  actual->append(to_append);
375}
376
377// A pair of helpers for the FormatUrlWithOffsets() test.
378void VerboseExpect(size_t expected,
379                   size_t actual,
380                   const std::string& original_url,
381                   size_t position,
382                   const base::string16& formatted_url) {
383  EXPECT_EQ(expected, actual) << "Original URL: " << original_url
384      << " (at char " << position << ")\nFormatted URL: " << formatted_url;
385}
386
387void CheckAdjustedOffsets(const std::string& url_string,
388                          const std::string& languages,
389                          FormatUrlTypes format_types,
390                          UnescapeRule::Type unescape_rules,
391                          const size_t* output_offsets) {
392  GURL url(url_string);
393  size_t url_length = url_string.length();
394  std::vector<size_t> offsets;
395  for (size_t i = 0; i <= url_length + 1; ++i)
396    offsets.push_back(i);
397  offsets.push_back(500000);  // Something larger than any input length.
398  offsets.push_back(std::string::npos);
399  base::string16 formatted_url = FormatUrlWithOffsets(url, languages,
400      format_types, unescape_rules, NULL, NULL, &offsets);
401  for (size_t i = 0; i < url_length; ++i)
402    VerboseExpect(output_offsets[i], offsets[i], url_string, i, formatted_url);
403  VerboseExpect(formatted_url.length(), offsets[url_length], url_string,
404                url_length, formatted_url);
405  VerboseExpect(base::string16::npos, offsets[url_length + 1], url_string,
406                500000, formatted_url);
407  VerboseExpect(base::string16::npos, offsets[url_length + 2], url_string,
408                std::string::npos, formatted_url);
409}
410
411}  // anonymous namespace
412
413TEST(NetUtilTest, IDNToUnicodeFast) {
414  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
415    for (size_t j = 0; j < arraysize(kLanguages); j++) {
416      // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
417      if (j == 3 || j == 17 || j == 18)
418        continue;
419      base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
420      base::string16 expected(idn_cases[i].unicode_allowed[j] ?
421          WideToUTF16(idn_cases[i].unicode_output) :
422          ASCIIToUTF16(idn_cases[i].input));
423      AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
424      EXPECT_EQ(expected, output);
425    }
426  }
427}
428
429TEST(NetUtilTest, IDNToUnicodeSlow) {
430  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
431    for (size_t j = 0; j < arraysize(kLanguages); j++) {
432      // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
433      if (!(j == 3 || j == 17 || j == 18))
434        continue;
435      base::string16 output(IDNToUnicode(idn_cases[i].input, kLanguages[j]));
436      base::string16 expected(idn_cases[i].unicode_allowed[j] ?
437          WideToUTF16(idn_cases[i].unicode_output) :
438          ASCIIToUTF16(idn_cases[i].input));
439      AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
440      EXPECT_EQ(expected, output);
441    }
442  }
443}
444
445TEST(NetUtilTest, StripWWW) {
446  EXPECT_EQ(base::string16(), StripWWW(base::string16()));
447  EXPECT_EQ(base::string16(), StripWWW(ASCIIToUTF16("www.")));
448  EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
449  EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
450}
451
452// This is currently a windows specific function.
453#if defined(OS_WIN)
454namespace {
455
456struct GetDirectoryListingEntryCase {
457  const wchar_t* name;
458  const char* raw_bytes;
459  bool is_dir;
460  int64 filesize;
461  base::Time time;
462  const char* expected;
463};
464
465}  // namespace
466
467TEST(NetUtilTest, GetDirectoryListingEntry) {
468  const GetDirectoryListingEntryCase test_cases[] = {
469    {L"Foo",
470     "",
471     false,
472     10000,
473     base::Time(),
474     "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
475    {L"quo\"tes",
476     "",
477     false,
478     10000,
479     base::Time(),
480     "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
481         "\n"},
482    {L"quo\"tes",
483     "quo\"tes",
484     false,
485     10000,
486     base::Time(),
487     "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
488         "\n"},
489    // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
490    // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
491    {L"\xD55C\xAE00.txt",
492     "",
493     false,
494     10000,
495     base::Time(),
496     "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\","
497         "\"%ED%95%9C%EA%B8%80.txt\",0,\"9.8 kB\",\"\");</script>\n"},
498    // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
499    // a local or remote file in EUC-KR.
500    {L"\xD55C\xAE00.txt",
501     "\xC7\xD1\xB1\xDB.txt",
502     false,
503     10000,
504     base::Time(),
505     "<script>addRow(\"\xED\x95\x9C\xEA\xB8\x80.txt\",\"%C7%D1%B1%DB.txt\""
506         ",0,\"9.8 kB\",\"\");</script>\n"},
507  };
508
509  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
510    const std::string results = GetDirectoryListingEntry(
511        WideToUTF16(test_cases[i].name),
512        test_cases[i].raw_bytes,
513        test_cases[i].is_dir,
514        test_cases[i].filesize,
515        test_cases[i].time);
516    EXPECT_EQ(test_cases[i].expected, results);
517  }
518}
519
520#endif
521
522TEST(NetUtilTest, FormatUrl) {
523  FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
524  const UrlTestData tests[] = {
525    {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
526
527    {"Simple URL",
528     "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
529     L"http://www.google.com/", 7},
530
531    {"With a port number and a reference",
532     "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
533     UnescapeRule::NORMAL,
534     L"http://www.google.com:8080/#\x30B0", 7},
535
536    // -------- IDN tests --------
537    {"Japanese IDN with ja",
538     "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
539     UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
540
541    {"Japanese IDN with en",
542     "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
543     UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
544
545    {"Japanese IDN without any languages",
546     "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
547     UnescapeRule::NORMAL,
548     // Single script is safe for empty languages.
549     L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
550
551    {"mailto: with Japanese IDN",
552     "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
553     UnescapeRule::NORMAL,
554     // GURL doesn't assume an email address's domain part as a host name.
555     L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
556
557    {"file: with Japanese IDN",
558     "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
559     UnescapeRule::NORMAL,
560     L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
561
562    {"ftp: with Japanese IDN",
563     "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
564     UnescapeRule::NORMAL,
565     L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
566
567    // -------- omit_username_password flag tests --------
568    {"With username and password, omit_username_password=false",
569     "http://user:passwd@example.com/foo", "",
570     kFormatUrlOmitNothing, UnescapeRule::NORMAL,
571     L"http://user:passwd@example.com/foo", 19},
572
573    {"With username and password, omit_username_password=true",
574     "http://user:passwd@example.com/foo", "", default_format_type,
575     UnescapeRule::NORMAL, L"http://example.com/foo", 7},
576
577    {"With username and no password",
578     "http://user@example.com/foo", "", default_format_type,
579     UnescapeRule::NORMAL, L"http://example.com/foo", 7},
580
581    {"Just '@' without username and password",
582     "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
583     L"http://example.com/foo", 7},
584
585    // GURL doesn't think local-part of an email address is username for URL.
586    {"mailto:, omit_username_password=true",
587     "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
588     L"mailto:foo@example.com", 7},
589
590    // -------- unescape flag tests --------
591    {"Do not unescape",
592     "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
593     "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
594     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
595     UnescapeRule::NONE,
596     // GURL parses %-encoded hostnames into Punycode.
597     L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
598     L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
599
600    {"Unescape normally",
601     "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
602     "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
603     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
604     UnescapeRule::NORMAL,
605     L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
606     L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
607
608    {"Unescape normally with BiDi control character",
609     "http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", "en", default_format_type,
610     UnescapeRule::NORMAL, L"http://example.com/%E2%80%AEabc?q=%E2%80%8Fxy", 7},
611
612    {"Unescape normally including unescape spaces",
613     "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
614     UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
615
616    /*
617    {"unescape=true with some special characters",
618    "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
619    kFormatUrlOmitNothing, UnescapeRule::NORMAL,
620    L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
621    */
622    // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
623
624    // -------- omit http: --------
625    {"omit http with user name",
626     "http://user@example.com/foo", "", kFormatUrlOmitAll,
627     UnescapeRule::NORMAL, L"example.com/foo", 0},
628
629    {"omit http",
630     "http://www.google.com/", "en", kFormatUrlOmitHTTP,
631     UnescapeRule::NORMAL, L"www.google.com/",
632     0},
633
634    {"omit http with https",
635     "https://www.google.com/", "en", kFormatUrlOmitHTTP,
636     UnescapeRule::NORMAL, L"https://www.google.com/",
637     8},
638
639    {"omit http starts with ftp.",
640     "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
641     UnescapeRule::NORMAL, L"http://ftp.google.com/",
642     7},
643
644    // -------- omit trailing slash on bare hostname --------
645    {"omit slash when it's the entire path",
646     "http://www.google.com/", "en",
647     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
648     L"http://www.google.com", 7},
649    {"omit slash when there's a ref",
650     "http://www.google.com/#ref", "en",
651     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
652     L"http://www.google.com/#ref", 7},
653    {"omit slash when there's a query",
654     "http://www.google.com/?", "en",
655     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
656     L"http://www.google.com/?", 7},
657    {"omit slash when it's not the entire path",
658     "http://www.google.com/foo", "en",
659     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
660     L"http://www.google.com/foo", 7},
661    {"omit slash for nonstandard URLs",
662     "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
663     UnescapeRule::NORMAL, L"data:/", 5},
664    {"omit slash for file URLs",
665     "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
666     UnescapeRule::NORMAL, L"file:///", 7},
667
668    // -------- view-source: --------
669    {"view-source",
670     "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
671     UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
672     19},
673
674    {"view-source of view-source",
675     "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
676     default_format_type, UnescapeRule::NORMAL,
677     L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
678
679    // view-source should omit http and trailing slash where non-view-source
680    // would.
681    {"view-source omit http",
682     "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
683     UnescapeRule::NORMAL, L"view-source:a.b/c",
684     12},
685    {"view-source omit http starts with ftp.",
686     "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
687     UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
688     19},
689    {"view-source omit slash when it's the entire path",
690     "view-source:http://a.b/", "en", kFormatUrlOmitAll,
691     UnescapeRule::NORMAL, L"view-source:a.b",
692     12},
693  };
694
695  for (size_t i = 0; i < arraysize(tests); ++i) {
696    size_t prefix_len;
697    base::string16 formatted = FormatUrl(
698        GURL(tests[i].input), tests[i].languages, tests[i].format_types,
699        tests[i].escape_rules, NULL, &prefix_len, NULL);
700    EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
701    EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
702  }
703}
704
705TEST(NetUtilTest, FormatUrlParsed) {
706  // No unescape case.
707  url::Parsed parsed;
708  base::string16 formatted = FormatUrl(
709      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
710           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
711      "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
712      NULL);
713  EXPECT_EQ(WideToUTF16(
714      L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
715      L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
716  EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
717      formatted.substr(parsed.username.begin, parsed.username.len));
718  EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
719      formatted.substr(parsed.password.begin, parsed.password.len));
720  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
721      formatted.substr(parsed.host.begin, parsed.host.len));
722  EXPECT_EQ(WideToUTF16(L"8080"),
723      formatted.substr(parsed.port.begin, parsed.port.len));
724  EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
725      formatted.substr(parsed.path.begin, parsed.path.len));
726  EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
727      formatted.substr(parsed.query.begin, parsed.query.len));
728  EXPECT_EQ(WideToUTF16(L"\x30B0"),
729      formatted.substr(parsed.ref.begin, parsed.ref.len));
730
731  // Unescape case.
732  formatted = FormatUrl(
733      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
734           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
735      "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
736      NULL);
737  EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
738      L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
739  EXPECT_EQ(WideToUTF16(L"\x30B0"),
740      formatted.substr(parsed.username.begin, parsed.username.len));
741  EXPECT_EQ(WideToUTF16(L"\x30FC"),
742      formatted.substr(parsed.password.begin, parsed.password.len));
743  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
744      formatted.substr(parsed.host.begin, parsed.host.len));
745  EXPECT_EQ(WideToUTF16(L"8080"),
746      formatted.substr(parsed.port.begin, parsed.port.len));
747  EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
748      formatted.substr(parsed.path.begin, parsed.path.len));
749  EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
750      formatted.substr(parsed.query.begin, parsed.query.len));
751  EXPECT_EQ(WideToUTF16(L"\x30B0"),
752      formatted.substr(parsed.ref.begin, parsed.ref.len));
753
754  // Omit_username_password + unescape case.
755  formatted = FormatUrl(
756      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
757           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
758      "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
759      NULL, NULL);
760  EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
761      L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
762  EXPECT_FALSE(parsed.username.is_valid());
763  EXPECT_FALSE(parsed.password.is_valid());
764  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
765      formatted.substr(parsed.host.begin, parsed.host.len));
766  EXPECT_EQ(WideToUTF16(L"8080"),
767      formatted.substr(parsed.port.begin, parsed.port.len));
768  EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
769      formatted.substr(parsed.path.begin, parsed.path.len));
770  EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
771      formatted.substr(parsed.query.begin, parsed.query.len));
772  EXPECT_EQ(WideToUTF16(L"\x30B0"),
773      formatted.substr(parsed.ref.begin, parsed.ref.len));
774
775  // View-source case.
776  formatted =
777      FormatUrl(GURL("view-source:http://user:passwd@host:81/path?query#ref"),
778                std::string(),
779                kFormatUrlOmitUsernamePassword,
780                UnescapeRule::NORMAL,
781                &parsed,
782                NULL,
783                NULL);
784  EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
785      formatted);
786  EXPECT_EQ(WideToUTF16(L"view-source:http"),
787      formatted.substr(parsed.scheme.begin, parsed.scheme.len));
788  EXPECT_FALSE(parsed.username.is_valid());
789  EXPECT_FALSE(parsed.password.is_valid());
790  EXPECT_EQ(WideToUTF16(L"host"),
791      formatted.substr(parsed.host.begin, parsed.host.len));
792  EXPECT_EQ(WideToUTF16(L"81"),
793      formatted.substr(parsed.port.begin, parsed.port.len));
794  EXPECT_EQ(WideToUTF16(L"/path"),
795      formatted.substr(parsed.path.begin, parsed.path.len));
796  EXPECT_EQ(WideToUTF16(L"query"),
797      formatted.substr(parsed.query.begin, parsed.query.len));
798  EXPECT_EQ(WideToUTF16(L"ref"),
799      formatted.substr(parsed.ref.begin, parsed.ref.len));
800
801  // omit http case.
802  formatted = FormatUrl(GURL("http://host:8000/a?b=c#d"),
803                        std::string(),
804                        kFormatUrlOmitHTTP,
805                        UnescapeRule::NORMAL,
806                        &parsed,
807                        NULL,
808                        NULL);
809  EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
810  EXPECT_FALSE(parsed.scheme.is_valid());
811  EXPECT_FALSE(parsed.username.is_valid());
812  EXPECT_FALSE(parsed.password.is_valid());
813  EXPECT_EQ(WideToUTF16(L"host"),
814      formatted.substr(parsed.host.begin, parsed.host.len));
815  EXPECT_EQ(WideToUTF16(L"8000"),
816      formatted.substr(parsed.port.begin, parsed.port.len));
817  EXPECT_EQ(WideToUTF16(L"/a"),
818      formatted.substr(parsed.path.begin, parsed.path.len));
819  EXPECT_EQ(WideToUTF16(L"b=c"),
820      formatted.substr(parsed.query.begin, parsed.query.len));
821  EXPECT_EQ(WideToUTF16(L"d"),
822      formatted.substr(parsed.ref.begin, parsed.ref.len));
823
824  // omit http starts with ftp case.
825  formatted = FormatUrl(GURL("http://ftp.host:8000/a?b=c#d"),
826                        std::string(),
827                        kFormatUrlOmitHTTP,
828                        UnescapeRule::NORMAL,
829                        &parsed,
830                        NULL,
831                        NULL);
832  EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
833  EXPECT_TRUE(parsed.scheme.is_valid());
834  EXPECT_FALSE(parsed.username.is_valid());
835  EXPECT_FALSE(parsed.password.is_valid());
836  EXPECT_EQ(WideToUTF16(L"http"),
837      formatted.substr(parsed.scheme.begin, parsed.scheme.len));
838  EXPECT_EQ(WideToUTF16(L"ftp.host"),
839      formatted.substr(parsed.host.begin, parsed.host.len));
840  EXPECT_EQ(WideToUTF16(L"8000"),
841      formatted.substr(parsed.port.begin, parsed.port.len));
842  EXPECT_EQ(WideToUTF16(L"/a"),
843      formatted.substr(parsed.path.begin, parsed.path.len));
844  EXPECT_EQ(WideToUTF16(L"b=c"),
845      formatted.substr(parsed.query.begin, parsed.query.len));
846  EXPECT_EQ(WideToUTF16(L"d"),
847      formatted.substr(parsed.ref.begin, parsed.ref.len));
848
849  // omit http starts with 'f' case.
850  formatted = FormatUrl(GURL("http://f/"),
851                        std::string(),
852                        kFormatUrlOmitHTTP,
853                        UnescapeRule::NORMAL,
854                        &parsed,
855                        NULL,
856                        NULL);
857  EXPECT_EQ(WideToUTF16(L"f/"), formatted);
858  EXPECT_FALSE(parsed.scheme.is_valid());
859  EXPECT_FALSE(parsed.username.is_valid());
860  EXPECT_FALSE(parsed.password.is_valid());
861  EXPECT_FALSE(parsed.port.is_valid());
862  EXPECT_TRUE(parsed.path.is_valid());
863  EXPECT_FALSE(parsed.query.is_valid());
864  EXPECT_FALSE(parsed.ref.is_valid());
865  EXPECT_EQ(WideToUTF16(L"f"),
866      formatted.substr(parsed.host.begin, parsed.host.len));
867  EXPECT_EQ(WideToUTF16(L"/"),
868      formatted.substr(parsed.path.begin, parsed.path.len));
869}
870
871// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
872// results in the original GURL, for each ASCII character in the path.
873TEST(NetUtilTest, FormatUrlRoundTripPathASCII) {
874  for (unsigned char test_char = 32; test_char < 128; ++test_char) {
875    GURL url(std::string("http://www.google.com/") +
876             static_cast<char>(test_char));
877    size_t prefix_len;
878    base::string16 formatted = FormatUrl(url,
879                                         std::string(),
880                                         kFormatUrlOmitUsernamePassword,
881                                         UnescapeRule::NORMAL,
882                                         NULL,
883                                         &prefix_len,
884                                         NULL);
885    EXPECT_EQ(url.spec(), GURL(formatted).spec());
886  }
887}
888
889// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
890// results in the original GURL, for each escaped ASCII character in the path.
891TEST(NetUtilTest, FormatUrlRoundTripPathEscaped) {
892  for (unsigned char test_char = 32; test_char < 128; ++test_char) {
893    std::string original_url("http://www.google.com/");
894    original_url.push_back('%');
895    original_url.append(base::HexEncode(&test_char, 1));
896
897    GURL url(original_url);
898    size_t prefix_len;
899    base::string16 formatted = FormatUrl(url,
900                                         std::string(),
901                                         kFormatUrlOmitUsernamePassword,
902                                         UnescapeRule::NORMAL,
903                                         NULL,
904                                         &prefix_len,
905                                         NULL);
906    EXPECT_EQ(url.spec(), GURL(formatted).spec());
907  }
908}
909
910// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
911// results in the original GURL, for each ASCII character in the query.
912TEST(NetUtilTest, FormatUrlRoundTripQueryASCII) {
913  for (unsigned char test_char = 32; test_char < 128; ++test_char) {
914    GURL url(std::string("http://www.google.com/?") +
915             static_cast<char>(test_char));
916    size_t prefix_len;
917    base::string16 formatted = FormatUrl(url,
918                                         std::string(),
919                                         kFormatUrlOmitUsernamePassword,
920                                         UnescapeRule::NORMAL,
921                                         NULL,
922                                         &prefix_len,
923                                         NULL);
924    EXPECT_EQ(url.spec(), GURL(formatted).spec());
925  }
926}
927
928// Make sure that calling FormatUrl on a GURL and then converting back to a GURL
929// only results in a different GURL for certain characters.
930TEST(NetUtilTest, FormatUrlRoundTripQueryEscaped) {
931  // A full list of characters which FormatURL should unescape and GURL should
932  // not escape again, when they appear in a query string.
933  const char* kUnescapedCharacters =
934      "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_~";
935  for (unsigned char test_char = 0; test_char < 128; ++test_char) {
936    std::string original_url("http://www.google.com/?");
937    original_url.push_back('%');
938    original_url.append(base::HexEncode(&test_char, 1));
939
940    GURL url(original_url);
941    size_t prefix_len;
942    base::string16 formatted = FormatUrl(url,
943                                         std::string(),
944                                         kFormatUrlOmitUsernamePassword,
945                                         UnescapeRule::NORMAL,
946                                         NULL,
947                                         &prefix_len,
948                                         NULL);
949
950    if (test_char &&
951        strchr(kUnescapedCharacters, static_cast<char>(test_char))) {
952      EXPECT_NE(url.spec(), GURL(formatted).spec());
953    } else {
954      EXPECT_EQ(url.spec(), GURL(formatted).spec());
955    }
956  }
957}
958
959TEST(NetUtilTest, FormatUrlWithOffsets) {
960  CheckAdjustedOffsets(std::string(), "en", kFormatUrlOmitNothing,
961                       UnescapeRule::NORMAL, NULL);
962
963  const size_t basic_offsets[] = {
964    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
965    21, 22, 23, 24, 25
966  };
967  CheckAdjustedOffsets("http://www.google.com/foo/", "en",
968                       kFormatUrlOmitNothing, UnescapeRule::NORMAL,
969                       basic_offsets);
970
971  const size_t omit_auth_offsets_1[] = {
972    0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7,
973    8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
974  };
975  CheckAdjustedOffsets("http://foo:bar@www.google.com/", "en",
976                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
977                       omit_auth_offsets_1);
978
979  const size_t omit_auth_offsets_2[] = {
980    0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, 7, 8, 9, 10, 11, 12, 13, 14,
981    15, 16, 17, 18, 19, 20, 21
982  };
983  CheckAdjustedOffsets("http://foo@www.google.com/", "en",
984                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
985                       omit_auth_offsets_2);
986
987  const size_t dont_omit_auth_offsets[] = {
988    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
989    kNpos, kNpos, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
990    kNpos, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
991    30, 31
992  };
993  // Unescape to "http://foo\x30B0:\x30B0bar@www.google.com".
994  CheckAdjustedOffsets("http://foo%E3%82%B0:%E3%82%B0bar@www.google.com/", "en",
995                       kFormatUrlOmitNothing, UnescapeRule::NORMAL,
996                       dont_omit_auth_offsets);
997
998  const size_t view_source_offsets[] = {
999    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, kNpos,
1000    kNpos, kNpos, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33
1001  };
1002  CheckAdjustedOffsets("view-source:http://foo@www.google.com/", "en",
1003                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1004                       view_source_offsets);
1005
1006  const size_t idn_hostname_offsets_1[] = {
1007    0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1008    kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12,
1009    13, 14, 15, 16, 17, 18, 19
1010  };
1011  // Convert punycode to "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/".
1012  CheckAdjustedOffsets("http://xn--l8jvb1ey91xtjb.jp/foo/", "ja",
1013                       kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1014                       idn_hostname_offsets_1);
1015
1016  const size_t idn_hostname_offsets_2[] = {
1017    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, kNpos, kNpos, kNpos, kNpos, kNpos,
1018    kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 14, 15, kNpos, kNpos, kNpos,
1019    kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1020    kNpos, 19, 20, 21, 22, 23, 24
1021  };
1022  // Convert punycode to
1023  // "http://test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test/".
1024  CheckAdjustedOffsets("http://test.xn--cy2a840a.xn--1lq90ic7f1rc.test/",
1025                       "zh-CN", kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1026                       idn_hostname_offsets_2);
1027
1028  const size_t unescape_offsets[] = {
1029    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1030    21, 22, 23, 24, 25, kNpos, kNpos, 26, 27, 28, 29, 30, kNpos, kNpos, kNpos,
1031    kNpos, kNpos, kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos, kNpos,
1032    kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1033    kNpos, 33, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos
1034  };
1035  // Unescape to "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB".
1036  CheckAdjustedOffsets(
1037      "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB",
1038      "en", kFormatUrlOmitNothing, UnescapeRule::SPACES, unescape_offsets);
1039
1040  const size_t ref_offsets[] = {
1041    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
1042    21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, kNpos, kNpos, 32, kNpos, kNpos,
1043    33
1044  };
1045  // Unescape to "http://www.google.com/foo.html#\x30B0\x30B0z".
1046  CheckAdjustedOffsets(
1047      "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z", "en",
1048      kFormatUrlOmitNothing, UnescapeRule::NORMAL, ref_offsets);
1049
1050  const size_t omit_http_offsets[] = {
1051    0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
1052    10, 11, 12, 13, 14
1053  };
1054  CheckAdjustedOffsets("http://www.google.com/", "en", kFormatUrlOmitHTTP,
1055                       UnescapeRule::NORMAL, omit_http_offsets);
1056
1057  const size_t omit_http_start_with_ftp_offsets[] = {
1058    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
1059  };
1060  CheckAdjustedOffsets("http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1061                       UnescapeRule::NORMAL, omit_http_start_with_ftp_offsets);
1062
1063  const size_t omit_all_offsets[] = {
1064    0, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, kNpos, kNpos, kNpos, kNpos,
1065    0, 1, 2, 3, 4, 5, 6, 7
1066  };
1067  CheckAdjustedOffsets("http://user@foo.com/", "en", kFormatUrlOmitAll,
1068                       UnescapeRule::NORMAL, omit_all_offsets);
1069}
1070
1071}  // namespace net
1072