1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/net_util.h"
6
7#include <algorithm>
8
9#include "base/file_path.h"
10#include "base/format_macros.h"
11#include "base/string_number_conversions.h"
12#include "base/string_util.h"
13#include "base/stringprintf.h"
14#include "base/sys_string_conversions.h"
15#include "base/test/test_file_util.h"
16#include "base/time.h"
17#include "base/utf_string_conversions.h"
18#include "googleurl/src/gurl.h"
19#include "net/base/sys_addrinfo.h"
20#include "testing/gtest/include/gtest/gtest.h"
21
22namespace net {
23
24namespace {
25
26static const size_t kNpos = string16::npos;
27
28struct FileCase {
29  const wchar_t* file;
30  const char* url;
31};
32
33struct HeaderCase {
34  const wchar_t* header_name;
35  const wchar_t* expected;
36};
37
38struct HeaderParamCase {
39  const wchar_t* header_name;
40  const wchar_t* param_name;
41  const wchar_t* expected;
42};
43
44struct FileNameCDCase {
45  const char* header_field;
46  const char* referrer_charset;
47  const wchar_t* expected;
48};
49
50const wchar_t* kLanguages[] = {
51  L"",      L"en",    L"zh-CN",       L"ja",    L"ko",
52  L"he",    L"ar",    L"ru",          L"el",    L"fr",
53  L"de",    L"pt",    L"sv",          L"th",    L"hi",
54  L"de,en", L"el,en", L"zh-TW,en",    L"ko,ja", L"he,ru,en",
55  L"zh,ru,en"
56};
57
58struct IDNTestCase {
59  const char* input;
60  const wchar_t* unicode_output;
61  const bool unicode_allowed[arraysize(kLanguages)];
62};
63
64// TODO(jungshik) This is just a random sample of languages and is far
65// from exhaustive.  We may have to generate all the combinations
66// of languages (powerset of a set of all the languages).
67const IDNTestCase idn_cases[] = {
68  // No IDN
69  {"www.google.com", L"www.google.com",
70   {true,  true,  true,  true,  true,
71    true,  true,  true,  true,  true,
72    true,  true,  true,  true,  true,
73    true,  true,  true,  true,  true,
74    true}},
75  {"www.google.com.", L"www.google.com.",
76   {true,  true,  true,  true,  true,
77    true,  true,  true,  true,  true,
78    true,  true,  true,  true,  true,
79    true,  true,  true,  true,  true,
80    true}},
81  {".", L".",
82   {true,  true,  true,  true,  true,
83    true,  true,  true,  true,  true,
84    true,  true,  true,  true,  true,
85    true,  true,  true,  true,  true,
86    true}},
87  {"", L"",
88   {true,  true,  true,  true,  true,
89    true,  true,  true,  true,  true,
90    true,  true,  true,  true,  true,
91    true,  true,  true,  true,  true,
92    true}},
93  // IDN
94  // Hanzi (Traditional Chinese)
95  {"xn--1lq90ic7f1rc.cn", L"\x5317\x4eac\x5927\x5b78.cn",
96   {true,  false, true,  true,  false,
97    false, false, false, false, false,
98    false, false, false, false, false,
99    false, false, true,  true,  false,
100    true}},
101  // Hanzi ('video' in Simplified Chinese : will pass only in zh-CN,zh)
102  {"xn--cy2a840a.com", L"\x89c6\x9891.com",
103   {true,  false, true,  false,  false,
104    false, false, false, false, false,
105    false, false, false, false, false,
106    false, false, false, false,  false,
107    true}},
108  // Hanzi + '123'
109  {"www.xn--123-p18d.com", L"www.\x4e00" L"123.com",
110   {true,  false, true,  true,  false,
111    false, false, false, false, false,
112    false, false, false, false, false,
113    false, false, true,  true,  false,
114    true}},
115  // Hanzi + Latin : U+56FD is simplified and is regarded
116  // as not supported in zh-TW.
117  {"www.xn--hello-9n1hm04c.com", L"www.hello\x4e2d\x56fd.com",
118   {false, false, true,  true,  false,
119    false, false, false, false, false,
120    false, false, false, false, false,
121    false, false, false, true,  false,
122    true}},
123  // Kanji + Kana (Japanese)
124  {"xn--l8jvb1ey91xtjb.jp", L"\x671d\x65e5\x3042\x3055\x3072.jp",
125   {true,  false, false, true,  false,
126    false, false, false, false, false,
127    false, false, false, false, false,
128    false, false, false, true,  false,
129    false}},
130  // Katakana including U+30FC
131  {"xn--tckm4i2e.jp", L"\x30b3\x30de\x30fc\x30b9.jp",
132   {true, false, false, true,  false,
133    false, false, false, false, false,
134    false, false, false, false, false,
135    false, false, false, true, false,
136    }},
137  {"xn--3ck7a7g.jp", L"\u30ce\u30f3\u30bd.jp",
138   {true, false, false, true,  false,
139    false, false, false, false, false,
140    false, false, false, false, false,
141    false, false, false, true, false,
142    }},
143  // Katakana + Latin (Japanese)
144  // TODO(jungshik): Change 'false' in the first element to 'true'
145  // after upgrading to ICU 4.2.1 to use new uspoof_* APIs instead
146  // of our IsIDNComponentInSingleScript().
147  {"xn--e-efusa1mzf.jp", L"e\x30b3\x30de\x30fc\x30b9.jp",
148   {false, false, false, true,  false,
149    false, false, false, false, false,
150    false, false, false, false, false,
151    false, false, false, true, false,
152    }},
153  {"xn--3bkxe.jp", L"\x30c8\x309a.jp",
154   {false, false, false, true,  false,
155    false, false, false, false, false,
156    false, false, false, false, false,
157    false, false, false, true, false,
158    }},
159  // Hangul (Korean)
160  {"www.xn--or3b17p6jjc.kr", L"www.\xc804\xc790\xc815\xbd80.kr",
161   {true,  false, false, false, true,
162    false, false, false, false, false,
163    false, false, false, false, false,
164    false, false, false, true,  false,
165    false}},
166  // b<u-umlaut>cher (German)
167  {"xn--bcher-kva.de", L"b\x00fc" L"cher.de",
168   {true,  false, false, false, false,
169    false, false, false, false, true,
170    true,  false,  false, false, false,
171    true,  false, false, false, false,
172    false}},
173  // a with diaeresis
174  {"www.xn--frgbolaget-q5a.se", L"www.f\x00e4rgbolaget.se",
175   {true,  false, false, false, false,
176    false, false, false, false, false,
177    true,  false, true, false, false,
178    true,  false, false, false, false,
179    false}},
180  // c-cedilla (French)
181  {"www.xn--alliancefranaise-npb.fr", L"www.alliancefran\x00e7" L"aise.fr",
182   {true,  false, false, false, false,
183    false, false, false, false, true,
184    false, true,  false, false, false,
185    false, false, false, false, false,
186    false}},
187  // caf'e with acute accent' (French)
188  {"xn--caf-dma.fr", L"caf\x00e9.fr",
189   {true,  false, false, false, false,
190    false, false, false, false, true,
191    false, true,  true,  false, false,
192    false, false, false, false, false,
193    false}},
194  // c-cedillla and a with tilde (Portuguese)
195  {"xn--poema-9qae5a.com.br", L"p\x00e3oema\x00e7\x00e3.com.br",
196   {true,  false, false, false, false,
197    false, false, false, false, false,
198    false, true,  false, false, false,
199    false, false, false, false, false,
200    false}},
201  // s with caron
202  {"xn--achy-f6a.com", L"\x0161" L"achy.com",
203   {true,  false, false, false, false,
204    false, false, false, false, false,
205    false, false, false, false, false,
206    false, false, false, false, false,
207    false}},
208  // TODO(jungshik) : Add examples with Cyrillic letters
209  // only used in some languages written in Cyrillic.
210  // Eutopia (Greek)
211  {"xn--kxae4bafwg.gr", L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1.gr",
212   {true,  false, false, false, false,
213    false, false, false, true,  false,
214    false, false, false, false, false,
215    false, true,  false, false, false,
216    false}},
217  // Eutopia + 123 (Greek)
218  {"xn---123-pldm0haj2bk.gr",
219   L"\x03bf\x03c5\x03c4\x03bf\x03c0\x03af\x03b1-123.gr",
220   {true,  false, false, false, false,
221    false, false, false, true,  false,
222    false, false, false, false, false,
223    false, true,  false, false, false,
224    false}},
225  // Cyrillic (Russian)
226  {"xn--n1aeec9b.ru", L"\x0442\x043e\x0440\x0442\x044b.ru",
227   {true,  false, false, false, false,
228    false, false, true,  false, false,
229    false, false, false, false, false,
230    false, false, false, false, true,
231    true}},
232  // Cyrillic + 123 (Russian)
233  {"xn---123-45dmmc5f.ru", L"\x0442\x043e\x0440\x0442\x044b-123.ru",
234   {true,  false, false, false, false,
235    false, false, true,  false, false,
236    false, false, false, false, false,
237    false, false, false, false, true,
238    true}},
239  // Arabic
240  {"xn--mgba1fmg.ar", L"\x0627\x0641\x0644\x0627\x0645.ar",
241   {true,  false, false, false, false,
242    false, true,  false, false, false,
243    false, false, false, false, false,
244    false, false, false, false, false,
245    false}},
246  // Hebrew
247  {"xn--4dbib.he", L"\x05d5\x05d0\x05d4.he",
248   {true,  false, false, false, false,
249    true,  false, false, false, false,
250    false, false, false, false, false,
251    false, false, false, false, true,
252    false}},
253  // Thai
254  {"xn--12c2cc4ag3b4ccu.th",
255   L"\x0e2a\x0e32\x0e22\x0e01\x0e32\x0e23\x0e1a\x0e34\x0e19.th",
256   {true,  false, false, false, false,
257    false, false, false, false, false,
258    false, false, false, true,  false,
259    false, false, false, false, false,
260    false}},
261  // Devangari (Hindi)
262  {"www.xn--l1b6a9e1b7c.in", L"www.\x0905\x0915\x094b\x0932\x093e.in",
263   {true,  false, false, false, false,
264    false, false, false, false, false,
265    false, false, false, false, true,
266    false, false, false, false, false,
267    false}},
268  // Invalid IDN
269  {"xn--hello?world.com", NULL,
270   {false, false, false, false, false,
271    false, false, false, false, false,
272    false, false, false, false, false,
273    false, false, false, false, false,
274    false}},
275  // Unsafe IDNs
276  // "payp<alpha>l.com"
277  {"www.xn--paypl-g9d.com", L"payp\x03b1l.com",
278   {false, false, false, false, false,
279    false, false, false, false, false,
280    false, false, false, false, false,
281    false, false, false, false, false,
282    false}},
283  // google.gr with Greek omicron and epsilon
284  {"xn--ggl-6xc1ca.gr", L"g\x03bf\x03bfgl\x03b5.gr",
285   {false, false, false, false, false,
286    false, false, false, false, false,
287    false, false, false, false, false,
288    false, false, false, false, false,
289    false}},
290  // google.ru with Cyrillic o
291  {"xn--ggl-tdd6ba.ru", L"g\x043e\x043egl\x0435.ru",
292   {false, false, false, false, false,
293    false, false, false, false, false,
294    false, false, false, false, false,
295    false, false, false, false, false,
296    false}},
297  // h<e with acute>llo<China in Han>.cn
298  {"xn--hllo-bpa7979ih5m.cn", L"h\x00e9llo\x4e2d\x56fd.cn",
299   {false, false, false, false, false,
300    false, false, false, false, false,
301    false, false, false, false, false,
302    false, false, false, false, false,
303    false}},
304  // <Greek rho><Cyrillic a><Cyrillic u>.ru
305  {"xn--2xa6t2b.ru", L"\x03c1\x0430\x0443.ru",
306   {false, false, false, false, false,
307    false, false, false, false, false,
308    false, false, false, false, false,
309    false, false, false, false, false,
310    false}},
311  // One that's really long that will force a buffer realloc
312  {"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
313       "aaaaaaa",
314   L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
315       L"aaaaaaaa",
316   {true,  true,  true,  true,  true,
317    true,  true,  true,  true,  true,
318    true,  true,  true,  true,  true,
319    true,  true,  true,  true,  true,
320    true}},
321  // Test cases for characters we blacklisted although allowed in IDN.
322  // Embedded spaces will be turned to %20 in the display.
323  // TODO(jungshik): We need to have more cases. This is a typical
324  // data-driven trap. The following test cases need to be separated
325  // and tested only for a couple of languages.
326  {"xn--osd3820f24c.kr", L"\xac00\xb098\x115f.kr",
327    {false, false, false, false, false,
328     false, false, false, false, false,
329     false, false, false, false, false,
330     false, false, false, false, false,
331     false}},
332  {"www.xn--google-ho0coa.com", L"www.\x2039google\x203a.com",
333    {false, false, false, false, false,
334     false, false, false, false, false,
335     false, false, false, false, false,
336     false, false, false, false, false,
337  }},
338  {"google.xn--comabc-k8d", L"google.com\x0338" L"abc",
339    {false, false, false, false, false,
340     false, false, false, false, false,
341     false, false, false, false, false,
342     false, false, false, false, false,
343  }},
344  {"google.xn--com-oh4ba.evil.jp", L"google.com\x309a\x309a.evil.jp",
345    {false, false, false, false, false,
346     false, false, false, false, false,
347     false, false, false, false, false,
348     false, false, false, false, false,
349  }},
350  {"google.xn--comevil-v04f.jp", L"google.com\x30ce" L"evil.jp",
351    {false, false, false, false, false,
352     false, false, false, false, false,
353     false, false, false, false, false,
354     false, false, false, false, false,
355  }},
356#if 0
357  // These two cases are special. We need a separate test.
358  // U+3000 and U+3002 are normalized to ASCII space and dot.
359  {"xn-- -kq6ay5z.cn", L"\x4e2d\x56fd\x3000.cn",
360    {false, false, true,  false, false,
361     false, false, false, false, false,
362     false, false, false, false, false,
363     false, false, true,  false, false,
364     true}},
365  {"xn--fiqs8s.cn", L"\x4e2d\x56fd\x3002" L"cn",
366    {false, false, true,  false, false,
367     false, false, false, false, false,
368     false, false, false, false, false,
369     false, false, true,  false, false,
370     true}},
371#endif
372};
373
374struct AdjustOffsetCase {
375  size_t input_offset;
376  size_t output_offset;
377};
378
379struct CompliantHostCase {
380  const char* host;
381  const char* desired_tld;
382  bool expected_output;
383};
384
385struct SuggestedFilenameCase {
386  const char* url;
387  const char* content_disp_header;
388  const char* referrer_charset;
389  const wchar_t* default_filename;
390  const wchar_t* expected_filename;
391};
392
393struct UrlTestData {
394  const char* description;
395  const char* input;
396  const char* languages;
397  FormatUrlTypes format_types;
398  UnescapeRule::Type escape_rules;
399  const wchar_t* output;  // Use |wchar_t| to handle Unicode constants easily.
400  size_t prefix_len;
401};
402
403// Returns an addrinfo for the given 32-bit address (IPv4.)
404// The result lives in static storage, so don't delete it.
405// |bytes| should be an array of length 4.
406const struct addrinfo* GetIPv4Address(const uint8* bytes, int port) {
407  static struct addrinfo static_ai;
408  static struct sockaddr_in static_addr4;
409
410  struct addrinfo* ai = &static_ai;
411  ai->ai_socktype = SOCK_STREAM;
412  memset(ai, 0, sizeof(static_ai));
413
414  ai->ai_family = AF_INET;
415  ai->ai_addrlen = sizeof(static_addr4);
416
417  struct sockaddr_in* addr4 = &static_addr4;
418  memset(addr4, 0, sizeof(static_addr4));
419  addr4->sin_port = htons(port);
420  addr4->sin_family = ai->ai_family;
421  memcpy(&addr4->sin_addr, bytes, 4);
422
423  ai->ai_addr = (sockaddr*)addr4;
424  return ai;
425}
426
427// Returns a addrinfo for the given 128-bit address (IPv6.)
428// The result lives in static storage, so don't delete it.
429// |bytes| should be an array of length 16.
430const struct addrinfo* GetIPv6Address(const uint8* bytes, int port) {
431  static struct addrinfo static_ai;
432  static struct sockaddr_in6 static_addr6;
433
434  struct addrinfo* ai = &static_ai;
435  ai->ai_socktype = SOCK_STREAM;
436  memset(ai, 0, sizeof(static_ai));
437
438  ai->ai_family = AF_INET6;
439  ai->ai_addrlen = sizeof(static_addr6);
440
441  struct sockaddr_in6* addr6 = &static_addr6;
442  memset(addr6, 0, sizeof(static_addr6));
443  addr6->sin6_port = htons(port);
444  addr6->sin6_family = ai->ai_family;
445  memcpy(&addr6->sin6_addr, bytes, 16);
446
447  ai->ai_addr = (sockaddr*)addr6;
448  return ai;
449}
450
451// A helper for IDN*{Fast,Slow}.
452// Append "::<language list>" to |expected| and |actual| to make it
453// easy to tell which sub-case fails without debugging.
454void AppendLanguagesToOutputs(const wchar_t* languages,
455                              std::wstring* expected,
456                              std::wstring* actual) {
457  expected->append(L"::");
458  expected->append(languages);
459  actual->append(L"::");
460  actual->append(languages);
461}
462
463// Helper to strignize an IP number (used to define expectations).
464std::string DumpIPNumber(const IPAddressNumber& v) {
465  std::string out;
466  for (size_t i = 0; i < v.size(); ++i) {
467    if (i != 0)
468      out.append(",");
469    out.append(base::IntToString(static_cast<int>(v[i])));
470  }
471  return out;
472}
473
474}  // anonymous namespace
475
476TEST(NetUtilTest, FileURLConversion) {
477  // a list of test file names and the corresponding URLs
478  const FileCase round_trip_cases[] = {
479#if defined(OS_WIN)
480    {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"},
481    {L"\\\\some computer\\foo\\bar.txt",
482     "file://some%20computer/foo/bar.txt"}, // UNC
483    {L"D:\\Name;with%some symbols*#",
484     "file:///D:/Name%3Bwith%25some%20symbols*%23"},
485    // issue 14153: To be tested with the OS default codepage other than 1252.
486    {L"D:\\latin1\\caf\x00E9\x00DD.txt",
487     "file:///D:/latin1/caf%C3%A9%C3%9D.txt"},
488    {L"D:\\otherlatin\\caf\x0119.txt",
489     "file:///D:/otherlatin/caf%C4%99.txt"},
490    {L"D:\\greek\\\x03B1\x03B2\x03B3.txt",
491     "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"},
492    {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
493     "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91"
494         "%E9%A1%B5.doc"},
495    {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt",  // Math alphabet "AB"
496     "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
497#elif defined(OS_POSIX)
498    {L"/foo/bar.txt", "file:///foo/bar.txt"},
499    {L"/foo/BAR.txt", "file:///foo/BAR.txt"},
500    {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"},
501    {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"},
502    {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"},
503    {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"},
504    {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"},
505    {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"},
506    {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc",
507     "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD"
508         "%91%E9%A1%B5.doc"},
509    {L"/plane1/\x1D400\x1D401.txt",  // Math alphabet "AB"
510     "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"},
511#endif
512  };
513
514  // First, we'll test that we can round-trip all of the above cases of URLs
515  FilePath output;
516  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) {
517    // convert to the file URL
518    GURL file_url(FilePathToFileURL(
519                      file_util::WStringAsFilePath(round_trip_cases[i].file)));
520    EXPECT_EQ(round_trip_cases[i].url, file_url.spec());
521
522    // Back to the filename.
523    EXPECT_TRUE(FileURLToFilePath(file_url, &output));
524    EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output));
525  }
526
527  // Test that various file: URLs get decoded into the correct file type
528  FileCase url_cases[] = {
529#if defined(OS_WIN)
530    {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"},
531    {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"},
532    {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
533    {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"},
534    {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"},
535    {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"},
536    {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"},
537    {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"},
538#elif defined(OS_POSIX)
539    {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"},
540    {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"},
541    {L"/foo/bar.txt", "file:/foo/bar.txt"},
542    {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
543    {L"/foo/bar.txt", "file:foo/bar.txt"},
544    {L"/bar.txt", "file://foo/bar.txt"},
545    {L"/foo/bar.txt", "file:///foo/bar.txt"},
546    {L"/foo/bar.txt", "file:////foo/bar.txt"},
547    {L"/foo/bar.txt", "file:////foo//bar.txt"},
548    {L"/foo/bar.txt", "file:////foo///bar.txt"},
549    {L"/foo/bar.txt", "file:////foo////bar.txt"},
550    {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"},
551    {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"},
552    // We get these wrong because GURL turns back slashes into forward
553    // slashes.
554    //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
555    //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"},
556    //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
557    //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"},
558    //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"},
559#endif
560  };
561  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) {
562    FileURLToFilePath(GURL(url_cases[i].url), &output);
563    EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output));
564  }
565
566  // Unfortunately, UTF8ToWide discards invalid UTF8 input.
567#ifdef BUG_878908_IS_FIXED
568  // Test that no conversion happens if the UTF-8 input is invalid, and that
569  // the input is preserved in UTF-8
570  const char invalid_utf8[] = "file:///d:/Blah/\xff.doc";
571  const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc";
572  EXPECT_TRUE(FileURLToFilePath(
573      GURL(std::string(invalid_utf8)), &output));
574  EXPECT_EQ(std::wstring(invalid_wide), output);
575#endif
576
577  // Test that if a file URL is malformed, we get a failure
578  EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output));
579}
580
581TEST(NetUtilTest, GetIdentityFromURL) {
582  struct {
583    const char* input_url;
584    const char* expected_username;
585    const char* expected_password;
586  } tests[] = {
587    {
588      "http://username:password@google.com",
589      "username",
590      "password",
591    },
592    { // Test for http://crbug.com/19200
593      "http://username:p@ssword@google.com",
594      "username",
595      "p@ssword",
596    },
597    { // Special URL characters should be unescaped.
598      "http://username:p%3fa%26s%2fs%23@google.com",
599      "username",
600      "p?a&s/s#",
601    },
602    { // Username contains %20.
603      "http://use rname:password@google.com",
604      "use rname",
605      "password",
606    },
607    { // Keep %00 as is.
608      "http://use%00rname:password@google.com",
609      "use%00rname",
610      "password",
611    },
612    { // Use a '+' in the username.
613      "http://use+rname:password@google.com",
614      "use+rname",
615      "password",
616    },
617    { // Use a '&' in the password.
618      "http://username:p&ssword@google.com",
619      "username",
620      "p&ssword",
621    },
622  };
623  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
624    SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
625                                    tests[i].input_url));
626    GURL url(tests[i].input_url);
627
628    string16 username, password;
629    GetIdentityFromURL(url, &username, &password);
630
631    EXPECT_EQ(ASCIIToUTF16(tests[i].expected_username), username);
632    EXPECT_EQ(ASCIIToUTF16(tests[i].expected_password), password);
633  }
634}
635
636// Try extracting a username which was encoded with UTF8.
637TEST(NetUtilTest, GetIdentityFromURL_UTF8) {
638  GURL url(WideToUTF16(L"http://foo:\x4f60\x597d@blah.com"));
639
640  EXPECT_EQ("foo", url.username());
641  EXPECT_EQ("%E4%BD%A0%E5%A5%BD", url.password());
642
643  // Extract the unescaped identity.
644  string16 username, password;
645  GetIdentityFromURL(url, &username, &password);
646
647  // Verify that it was decoded as UTF8.
648  EXPECT_EQ(ASCIIToUTF16("foo"), username);
649  EXPECT_EQ(WideToUTF16(L"\x4f60\x597d"), password);
650}
651
652// Just a bunch of fake headers.
653const wchar_t* google_headers =
654    L"HTTP/1.1 200 OK\n"
655    L"Content-TYPE: text/html; charset=utf-8\n"
656    L"Content-disposition: attachment; filename=\"download.pdf\"\n"
657    L"Content-Length: 378557\n"
658    L"X-Google-Google1: 314159265\n"
659    L"X-Google-Google2: aaaa2:7783,bbb21:9441\n"
660    L"X-Google-Google4: home\n"
661    L"Transfer-Encoding: chunked\n"
662    L"Set-Cookie: HEHE_AT=6666x66beef666x6-66xx6666x66; Path=/mail\n"
663    L"Set-Cookie: HEHE_HELP=owned:0;Path=/\n"
664    L"Set-Cookie: S=gmail=Xxx-beefbeefbeef_beefb:gmail_yj=beefbeef000beefbee"
665        L"fbee:gmproxy=bee-fbeefbe; Domain=.google.com; Path=/\n"
666    L"X-Google-Google2: /one/two/three/four/five/six/seven-height/nine:9411\n"
667    L"Server: GFE/1.3\n"
668    L"Transfer-Encoding: chunked\n"
669    L"Date: Mon, 13 Nov 2006 21:38:09 GMT\n"
670    L"Expires: Tue, 14 Nov 2006 19:23:58 GMT\n"
671    L"X-Malformed: bla; arg=test\"\n"
672    L"X-Malformed2: bla; arg=\n"
673    L"X-Test: bla; arg1=val1; arg2=val2";
674
675TEST(NetUtilTest, GetSpecificHeader) {
676  const HeaderCase tests[] = {
677    {L"content-type", L"text/html; charset=utf-8"},
678    {L"CONTENT-LENGTH", L"378557"},
679    {L"Date", L"Mon, 13 Nov 2006 21:38:09 GMT"},
680    {L"Bad-Header", L""},
681    {L"", L""},
682  };
683
684  // Test first with google_headers.
685  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
686    std::wstring result = GetSpecificHeader(google_headers,
687                                                 tests[i].header_name);
688    EXPECT_EQ(result, tests[i].expected);
689  }
690
691  // Test again with empty headers.
692  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
693    std::wstring result = GetSpecificHeader(L"", tests[i].header_name);
694    EXPECT_EQ(result, std::wstring());
695  }
696}
697
698TEST(NetUtilTest, GetHeaderParamValue) {
699  const HeaderParamCase tests[] = {
700    {L"Content-type", L"charset", L"utf-8"},
701    {L"content-disposition", L"filename", L"download.pdf"},
702    {L"Content-Type", L"badparam", L""},
703    {L"X-Malformed", L"arg", L"test\""},
704    {L"X-Malformed2", L"arg", L""},
705    {L"X-Test", L"arg1", L"val1"},
706    {L"X-Test", L"arg2", L"val2"},
707    {L"Bad-Header", L"badparam", L""},
708    {L"Bad-Header", L"", L""},
709    {L"", L"badparam", L""},
710    {L"", L"", L""},
711  };
712  // TODO(mpcomplete): add tests for other formats of headers.
713
714  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
715    std::wstring header_value =
716        GetSpecificHeader(google_headers, tests[i].header_name);
717    std::wstring result =
718        GetHeaderParamValue(header_value, tests[i].param_name,
719                            QuoteRule::REMOVE_OUTER_QUOTES);
720    EXPECT_EQ(result, tests[i].expected);
721  }
722
723  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
724    std::wstring header_value =
725        GetSpecificHeader(L"", tests[i].header_name);
726    std::wstring result =
727        GetHeaderParamValue(header_value, tests[i].param_name,
728                            QuoteRule::REMOVE_OUTER_QUOTES);
729    EXPECT_EQ(result, std::wstring());
730  }
731}
732
733TEST(NetUtilTest, GetHeaderParamValueQuotes) {
734  struct {
735    const char* header;
736    const char* expected_with_quotes;
737    const char* expected_without_quotes;
738  } tests[] = {
739    {"filename=foo", "foo", "foo"},
740    {"filename=\"foo\"", "\"foo\"", "foo"},
741    {"filename=foo\"", "foo\"", "foo\""},
742    {"filename=fo\"o", "fo\"o", "fo\"o"},
743  };
744
745  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
746    std::string actual_with_quotes =
747        GetHeaderParamValue(tests[i].header, "filename",
748                            QuoteRule::KEEP_OUTER_QUOTES);
749    std::string actual_without_quotes =
750        GetHeaderParamValue(tests[i].header, "filename",
751                            QuoteRule::REMOVE_OUTER_QUOTES);
752    EXPECT_EQ(tests[i].expected_with_quotes, actual_with_quotes)
753        << "Failed while processing: " << tests[i].header;
754    EXPECT_EQ(tests[i].expected_without_quotes, actual_without_quotes)
755        << "Failed while processing: " << tests[i].header;
756  }
757}
758
759TEST(NetUtilTest, GetFileNameFromCD) {
760  const FileNameCDCase tests[] = {
761    // Test various forms of C-D header fields emitted by web servers.
762    {"content-disposition: inline; filename=\"abcde.pdf\"", "", L"abcde.pdf"},
763    {"content-disposition: inline; name=\"abcde.pdf\"", "", L"abcde.pdf"},
764    {"content-disposition: attachment; filename=abcde.pdf", "", L"abcde.pdf"},
765    {"content-disposition: attachment; name=abcde.pdf", "", L"abcde.pdf"},
766    {"content-disposition: attachment; filename=abc,de.pdf", "", L"abc,de.pdf"},
767    {"content-disposition: filename=abcde.pdf", "", L"abcde.pdf"},
768    {"content-disposition: filename= abcde.pdf", "", L"abcde.pdf"},
769    {"content-disposition: filename =abcde.pdf", "", L"abcde.pdf"},
770    {"content-disposition: filename = abcde.pdf", "", L"abcde.pdf"},
771    {"content-disposition: filename\t=abcde.pdf", "", L"abcde.pdf"},
772    {"content-disposition: filename \t\t  =abcde.pdf", "", L"abcde.pdf"},
773    {"content-disposition: name=abcde.pdf", "", L"abcde.pdf"},
774    {"content-disposition: inline; filename=\"abc%20de.pdf\"", "",
775     L"abc de.pdf"},
776    // Unbalanced quotation mark
777    {"content-disposition: filename=\"abcdef.pdf", "", L"abcdef.pdf"},
778    // Whitespaces are converted to a space.
779    {"content-disposition: inline; filename=\"abc  \t\nde.pdf\"", "",
780     L"abc    de.pdf"},
781    // %-escaped UTF-8
782    {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
783     "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220 \xc608\xc220.jpg"},
784    {"Content-Disposition: attachment; filename=\"%F0%90%8C%B0%F0%90%8C%B1"
785     "abc.jpg\"", "", L"\U00010330\U00010331abc.jpg"},
786    {"Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0 \n"
787     "%EC%98%88%EC%88%A0.jpg\"", "", L"\xc608\xc220  \xc608\xc220.jpg"},
788    // RFC 2047 with various charsets and Q/B encodings
789    {"Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
790     "D13=2Epng?=\"", "", L"\x82b8\x8853" L"3.png"},
791    {"Content-Disposition: attachment; filename==?eUc-Kr?b?v7m8+iAzLnBuZw==?=",
792     "", L"\xc608\xc220 3.png"},
793    {"Content-Disposition: attachment; filename==?utf-8?Q?=E8=8A=B8=E8"
794     "=A1=93_3=2Epng?=", "", L"\x82b8\x8853 3.png"},
795    {"Content-Disposition: attachment; filename==?utf-8?Q?=F0=90=8C=B0"
796     "_3=2Epng?=", "", L"\U00010330 3.png"},
797    {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=e9_=2epng?=\"",
798     "", L"caf\x00e9 .png"},
799    // Space after an encoded word should be removed.
800    {"Content-Disposition: inline; filename=\"=?iso88591?Q?caf=E9_?= .png\"",
801     "", L"caf\x00e9 .png"},
802    // Two encoded words with different charsets (not very likely to be emitted
803    // by web servers in the wild). Spaces between them are removed.
804    {"Content-Disposition: inline; filename=\"=?euc-kr?b?v7m8+iAz?="
805     " =?ksc5601?q?=BF=B9=BC=FA=2Epng?=\"", "",
806     L"\xc608\xc220 3\xc608\xc220.png"},
807    {"Content-Disposition: attachment; filename=\"=?windows-1252?Q?caf=E9?="
808     "  =?iso-8859-7?b?4eI=?= .png\"", "", L"caf\x00e9\x03b1\x03b2.png"},
809    // Non-ASCII string is passed through and treated as UTF-8 as long as
810    // it's valid as UTF-8 and regardless of |referrer_charset|.
811    {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
812     "iso-8859-1", L"caf\x00e9.png"},
813    {"Content-Disposition: attachment; filename=caf\xc3\xa9.png",
814     "", L"caf\x00e9.png"},
815    // Non-ASCII/Non-UTF-8 string. Fall back to the referrer charset.
816    {"Content-Disposition: attachment; filename=caf\xe5.png",
817     "windows-1253", L"caf\x03b5.png"},
818#if 0
819    // Non-ASCII/Non-UTF-8 string. Fall back to the native codepage.
820    // TODO(jungshik): We need to set the OS default codepage
821    // to a specific value before testing. On Windows, we can use
822    // SetThreadLocale().
823    {"Content-Disposition: attachment; filename=\xb0\xa1\xb0\xa2.png",
824     "", L"\xac00\xac01.png"},
825#endif
826    // Failure cases
827    // Invalid hex-digit "G"
828    {"Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=", "",
829     L""},
830    // Incomplete RFC 2047 encoded-word (missing '='' at the end)
831    {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?", "", L""},
832    // Extra character at the end of an encoded word
833    {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?==",
834     "", L""},
835    // Extra token at the end of an encoded word
836    {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?",
837     "", L""},
838    {"Content-Disposition: attachment; filename==?iso88591?Q?caf=E3?=?=",
839     "",  L""},
840    // Incomplete hex-escaped chars
841    {"Content-Disposition: attachment; filename==?windows-1252?Q?=63=61=E?=",
842     "", L""},
843    {"Content-Disposition: attachment; filename=%EC%98%88%EC%88%A", "", L""},
844    // %-escaped non-UTF-8 encoding is an "error"
845    {"Content-Disposition: attachment; filename=%B7%DD%BD%D1.png", "", L""},
846    // Two RFC 2047 encoded words in a row without a space is an error.
847    {"Content-Disposition: attachment; filename==?windows-1252?Q?caf=E3?="
848     "=?iso-8859-7?b?4eIucG5nCg==?=", "", L""},
849
850    // RFC 5987 tests with Filename*  : see http://tools.ietf.org/html/rfc5987
851    {"Content-Disposition: attachment; filename*=foo.html", "", L""},
852    {"Content-Disposition: attachment; filename*=foo'.html", "", L""},
853    {"Content-Disposition: attachment; filename*=''foo'.html", "", L""},
854    {"Content-Disposition: attachment; filename*=''foo.html'", "", L""},
855    {"Content-Disposition: attachment; filename*=''f\"oo\".html'", "", L""},
856    {"Content-Disposition: attachment; filename*=bogus_charset''foo.html'",
857     "", L""},
858    {"Content-Disposition: attachment; filename*='en'foo.html'", "", L""},
859    {"Content-Disposition: attachment; filename*=iso-8859-1'en'foo.html", "",
860      L"foo.html"},
861    {"Content-Disposition: attachment; filename*=utf-8'en'foo.html", "",
862      L"foo.html"},
863    // charset cannot be omitted.
864    {"Content-Disposition: attachment; filename*='es'f\xfa.html'", "", L""},
865    // Non-ASCII bytes are not allowed.
866    {"Content-Disposition: attachment; filename*=iso-8859-1'es'f\xfa.html", "",
867      L""},
868    {"Content-Disposition: attachment; filename*=utf-8'es'f\xce\xba.html", "",
869      L""},
870    // TODO(jshin): Space should be %-encoded, but currently, we allow
871    // spaces.
872    {"Content-Disposition: inline; filename*=iso88591''cafe foo.png", "",
873      L"cafe foo.png"},
874
875    // Filename* tests converted from Q-encoded tests above.
876    {"Content-Disposition: attachment; filename*=EUC-JP''%B7%DD%BD%D13%2Epng",
877     "", L"\x82b8\x8853" L"3.png"},
878    {"Content-Disposition: attachment; filename*=utf-8''"
879      "%E8%8A%B8%E8%A1%93%203%2Epng", "", L"\x82b8\x8853 3.png"},
880    {"Content-Disposition: attachment; filename*=utf-8''%F0%90%8C%B0 3.png", "",
881      L"\U00010330 3.png"},
882    {"Content-Disposition: inline; filename*=Euc-Kr'ko'%BF%B9%BC%FA%2Epng", "",
883     L"\xc608\xc220.png"},
884    {"Content-Disposition: attachment; filename*=windows-1252''caf%E9.png", "",
885      L"caf\x00e9.png"},
886
887    // http://greenbytes.de/tech/tc2231/ filename* test cases.
888    // attwithisofn2231iso
889    {"Content-Disposition: attachment; filename*=iso-8859-1''foo-%E4.html", "",
890      L"foo-\xe4.html"},
891    // attwithfn2231utf8
892    {"Content-Disposition: attachment; filename*="
893      "UTF-8''foo-%c3%a4-%e2%82%ac.html", "", L"foo-\xe4-\x20ac.html"},
894    // attwithfn2231noc : no encoding specified but UTF-8 is used.
895    {"Content-Disposition: attachment; filename*=''foo-%c3%a4-%e2%82%ac.html",
896      "", L""},
897    // attwithfn2231utf8comp
898    {"Content-Disposition: attachment; filename*=UTF-8''foo-a%cc%88.html", "",
899      L"foo-\xe4.html"},
900#ifdef ICU_SHOULD_FAIL_CONVERSION_ON_INVALID_CHARACTER
901    // This does not work because we treat ISO-8859-1 synonymous with
902    // Windows-1252 per HTML5. For HTTP, in theory, we're not
903    // supposed to.
904    // attwithfn2231utf8-bad
905    {"Content-Disposition: attachment; filename*="
906      "iso-8859-1''foo-%c3%a4-%e2%82%ac.html", "", L""},
907#endif
908    // attwithfn2231ws1
909    {"Content-Disposition: attachment; filename *=UTF-8''foo-%c3%a4.html", "",
910      L""},
911    // attwithfn2231ws2
912    {"Content-Disposition: attachment; filename*= UTF-8''foo-%c3%a4.html", "",
913      L"foo-\xe4.html"},
914    // attwithfn2231ws3
915    {"Content-Disposition: attachment; filename* =UTF-8''foo-%c3%a4.html", "",
916      L"foo-\xe4.html"},
917    // attwithfn2231quot
918    {"Content-Disposition: attachment; filename*=\"UTF-8''foo-%c3%a4.html\"",
919      "", L""},
920    // attfnboth
921    {"Content-Disposition: attachment; filename=\"foo-ae.html\"; "
922      "filename*=UTF-8''foo-%c3%a4.html", "", L"foo-\xe4.html"},
923    // attfnboth2
924    {"Content-Disposition: attachment; filename*=UTF-8''foo-%c3%a4.html; "
925      "filename=\"foo-ae.html\"", "", L"foo-\xe4.html"},
926    // attnewandfn
927    {"Content-Disposition: attachment; foobar=x; filename=\"foo.html\"", "",
928      L"foo.html"},
929  };
930  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
931    EXPECT_EQ(tests[i].expected,
932              UTF8ToWide(GetFileNameFromCD(tests[i].header_field,
933                                           tests[i].referrer_charset)))
934        << "Failed on input: " << tests[i].header_field;
935  }
936}
937
938TEST(NetUtilTest, IDNToUnicodeFast) {
939  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
940    for (size_t j = 0; j < arraysize(kLanguages); j++) {
941      // ja || zh-TW,en || ko,ja -> IDNToUnicodeSlow
942      if (j == 3 || j == 17 || j == 18)
943        continue;
944      std::wstring output(IDNToUnicode(idn_cases[i].input,
945          strlen(idn_cases[i].input), kLanguages[j], NULL));
946      std::wstring expected(idn_cases[i].unicode_allowed[j] ?
947          idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
948      AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
949      EXPECT_EQ(expected, output);
950    }
951  }
952}
953
954TEST(NetUtilTest, IDNToUnicodeSlow) {
955  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_cases); i++) {
956    for (size_t j = 0; j < arraysize(kLanguages); j++) {
957      // !(ja || zh-TW,en || ko,ja) -> IDNToUnicodeFast
958      if (!(j == 3 || j == 17 || j == 18))
959        continue;
960      std::wstring output(IDNToUnicode(idn_cases[i].input,
961          strlen(idn_cases[i].input), kLanguages[j], NULL));
962      std::wstring expected(idn_cases[i].unicode_allowed[j] ?
963          idn_cases[i].unicode_output : ASCIIToWide(idn_cases[i].input));
964      AppendLanguagesToOutputs(kLanguages[j], &expected, &output);
965      EXPECT_EQ(expected, output);
966    }
967  }
968}
969
970TEST(NetUtilTest, IDNToUnicodeAdjustOffset) {
971  const AdjustOffsetCase adjust_cases[] = {
972    {0, 0},
973    {2, 2},
974    {4, 4},
975    {5, 5},
976    {6, string16::npos},
977    {16, string16::npos},
978    {17, 7},
979    {18, 8},
980    {19, string16::npos},
981    {25, string16::npos},
982    {34, 12},
983    {35, 13},
984    {38, 16},
985    {39, string16::npos},
986    {string16::npos, string16::npos},
987  };
988  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(adjust_cases); ++i) {
989    size_t offset = adjust_cases[i].input_offset;
990    // "test.\x89c6\x9891.\x5317\x4eac\x5927\x5b78.test"
991    IDNToUnicode("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39, L"zh-CN",
992                      &offset);
993    EXPECT_EQ(adjust_cases[i].output_offset, offset);
994  }
995
996  std::vector<size_t> offsets;
997  for (size_t i = 0; i < 40; ++i)
998    offsets.push_back(i);
999  IDNToUnicodeWithOffsets("test.xn--cy2a840a.xn--1lq90ic7f1rc.test", 39,
1000                          L"zh-CN", &offsets);
1001  size_t expected[] = {0, 1, 2, 3, 4, 5, kNpos, kNpos, kNpos, kNpos, kNpos,
1002                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 7, 8, kNpos,
1003                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1004                       kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15,
1005                       16, kNpos};
1006  ASSERT_EQ(40U, arraysize(expected));
1007  for (size_t i = 0; i < 40; ++i)
1008    EXPECT_EQ(expected[i], offsets[i]);
1009}
1010
1011TEST(NetUtilTest, CompliantHost) {
1012  const CompliantHostCase compliant_host_cases[] = {
1013    {"", "", false},
1014    {"a", "", true},
1015    {"-", "", false},
1016    {".", "", false},
1017    {"9", "", false},
1018    {"9", "a", true},
1019    {"9a", "", false},
1020    {"9a", "a", true},
1021    {"a.", "", true},
1022    {"a.a", "", true},
1023    {"9.a", "", true},
1024    {"a.9", "", false},
1025    {"_9a", "", false},
1026    {"a.a9", "", true},
1027    {"a.9a", "", false},
1028    {"a+9a", "", false},
1029    {"1-.a-b", "", false},
1030    {"1-2.a_b", "", true},
1031    {"a.b.c.d.e", "", true},
1032    {"1.2.3.4.e", "", true},
1033    {"a.b.c.d.5", "", false},
1034    {"1.2.3.4.e.", "", true},
1035    {"a.b.c.d.5.", "", false},
1036  };
1037
1038  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(compliant_host_cases); ++i) {
1039    EXPECT_EQ(compliant_host_cases[i].expected_output,
1040        IsCanonicalizedHostCompliant(compliant_host_cases[i].host,
1041                                          compliant_host_cases[i].desired_tld));
1042  }
1043}
1044
1045TEST(NetUtilTest, StripWWW) {
1046  EXPECT_EQ(string16(), StripWWW(string16()));
1047  EXPECT_EQ(string16(), StripWWW(ASCIIToUTF16("www.")));
1048  EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("www.blah")));
1049  EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah")));
1050}
1051
1052TEST(NetUtilTest, GetSuggestedFilename) {
1053  const SuggestedFilenameCase test_cases[] = {
1054    {"http://www.google.com/",
1055     "Content-disposition: attachment; filename=test.html",
1056     "",
1057     L"",
1058     L"test.html"},
1059    {"http://www.google.com/",
1060     "Content-disposition: attachment; filename=\"test.html\"",
1061     "",
1062     L"",
1063     L"test.html"},
1064    {"http://www.google.com/path/test.html",
1065     "Content-disposition: attachment",
1066     "",
1067     L"",
1068     L"test.html"},
1069    {"http://www.google.com/path/test.html",
1070     "Content-disposition: attachment;",
1071     "",
1072     L"",
1073     L"test.html"},
1074    {"http://www.google.com/",
1075     "",
1076     "",
1077     L"",
1078     L"www.google.com"},
1079    {"http://www.google.com/test.html",
1080     "",
1081     "",
1082     L"",
1083     L"test.html"},
1084    // Now that we use googleurl's ExtractFileName, this case falls back
1085    // to the hostname. If this behavior is not desirable, we'd better
1086    // change ExtractFileName (in url_parse).
1087    {"http://www.google.com/path/",
1088     "",
1089     "",
1090     L"",
1091     L"www.google.com"},
1092    {"http://www.google.com/path",
1093     "",
1094     "",
1095     L"",
1096     L"path"},
1097    {"file:///",
1098     "",
1099     "",
1100     L"",
1101     L"download"},
1102    {"non-standard-scheme:",
1103     "",
1104     "",
1105     L"",
1106     L"download"},
1107    {"http://www.google.com/",
1108     "Content-disposition: attachment; filename =\"test.html\"",
1109     "",
1110     L"download",
1111     L"test.html"},
1112    {"http://www.google.com/",
1113     "",
1114     "",
1115     L"download",
1116     L"download"},
1117    {"http://www.google.com/",
1118     "Content-disposition: attachment; filename=\"../test.html\"",
1119     "",
1120     L"",
1121     L"_test.html"},
1122    {"http://www.google.com/",
1123     "Content-disposition: attachment; filename=\"..\\test.html\"",
1124     "",
1125     L"",
1126     L"_test.html"},
1127    {"http://www.google.com/",
1128     "Content-disposition: attachment; filename=\"..\"",
1129     "",
1130     L"download",
1131     L"download"},
1132    {"http://www.google.com/test.html",
1133     "Content-disposition: attachment; filename=\"..\"",
1134     "",
1135     L"download",
1136     L"test.html"},
1137    // Below is a small subset of cases taken from GetFileNameFromCD test above.
1138    {"http://www.google.com/",
1139     "Content-Disposition: attachment; filename=\"%EC%98%88%EC%88%A0%20"
1140     "%EC%98%88%EC%88%A0.jpg\"",
1141     "",
1142     L"",
1143     L"\uc608\uc220 \uc608\uc220.jpg"},
1144    {"http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg",
1145     "",
1146     "",
1147     L"download",
1148     L"\uc608\uc220 \uc608\uc220.jpg"},
1149    {"http://www.google.com/",
1150     "Content-disposition: attachment;",
1151     "",
1152     L"\uB2E4\uC6B4\uB85C\uB4DC",
1153     L"\uB2E4\uC6B4\uB85C\uB4DC"},
1154    {"http://www.google.com/",
1155     "Content-Disposition: attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD="
1156     "D13=2Epng?=\"",
1157     "",
1158     L"download",
1159     L"\u82b8\u88533.png"},
1160    {"http://www.example.com/images?id=3",
1161     "Content-Disposition: attachment; filename=caf\xc3\xa9.png",
1162     "iso-8859-1",
1163     L"",
1164     L"caf\u00e9.png"},
1165    {"http://www.example.com/images?id=3",
1166     "Content-Disposition: attachment; filename=caf\xe5.png",
1167     "windows-1253",
1168     L"",
1169     L"caf\u03b5.png"},
1170    {"http://www.example.com/file?id=3",
1171     "Content-Disposition: attachment; name=\xcf\xc2\xd4\xd8.zip",
1172     "GBK",
1173     L"",
1174     L"\u4e0b\u8f7d.zip"},
1175    // Invalid C-D header. Extracts filename from url.
1176    {"http://www.google.com/test.html",
1177     "Content-Disposition: attachment; filename==?iiso88591?Q?caf=EG?=",
1178     "",
1179     L"",
1180     L"test.html"},
1181    // about: and data: URLs
1182    {"about:chrome",
1183     "",
1184     "",
1185     L"",
1186     L"download"},
1187    {"data:,looks/like/a.path",
1188     "",
1189     "",
1190     L"",
1191     L"download"},
1192    {"data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=",
1193     "",
1194     "",
1195     L"",
1196     L"download"},
1197    {"data:,looks/like/a.path",
1198     "",
1199     "",
1200     L"default_filename_is_given",
1201     L"default_filename_is_given"},
1202    {"data:,looks/like/a.path",
1203     "",
1204     "",
1205     L"\u65e5\u672c\u8a9e",  // Japanese Kanji.
1206     L"\u65e5\u672c\u8a9e"},
1207    // Dotfiles. Ensures preceeding period(s) stripped.
1208    {"http://www.google.com/.test.html",
1209    "",
1210    "",
1211    L"",
1212    L"test.html"},
1213    {"http://www.google.com/.test",
1214    "",
1215    "",
1216    L"",
1217    L"test"},
1218    {"http://www.google.com/..test",
1219    "",
1220    "",
1221    L"",
1222    L"test"},
1223    // The filename encoding is specified by the referrer charset.
1224    {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1225     "",
1226     "iso-8859-1",
1227     L"",
1228     L"V\u00fdvojov\u00e1 psychologie.doc"},
1229    // The filename encoding doesn't match the referrer charset, the
1230    // system charset, or UTF-8.
1231    // TODO(jshin): we need to handle this case.
1232#if 0
1233    {"http://example.com/V%FDvojov%E1%20psychologie.doc",
1234     "",
1235     "utf-8",
1236     L"",
1237     L"V\u00fdvojov\u00e1 psychologie.doc",
1238    },
1239#endif
1240  };
1241  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1242    std::wstring default_name = test_cases[i].default_filename;
1243    string16 filename = GetSuggestedFilename(
1244        GURL(test_cases[i].url), test_cases[i].content_disp_header,
1245        test_cases[i].referrer_charset, WideToUTF16(default_name));
1246    EXPECT_EQ(std::wstring(test_cases[i].expected_filename),
1247              UTF16ToWide(filename))
1248      << "Iteration " << i << ": " << test_cases[i].url;
1249  }
1250}
1251
1252// This is currently a windows specific function.
1253#if defined(OS_WIN)
1254namespace {
1255
1256struct GetDirectoryListingEntryCase {
1257  const wchar_t* name;
1258  const char* raw_bytes;
1259  bool is_dir;
1260  int64 filesize;
1261  base::Time time;
1262  const char* expected;
1263};
1264
1265}  // namespace
1266TEST(NetUtilTest, GetDirectoryListingEntry) {
1267  const GetDirectoryListingEntryCase test_cases[] = {
1268    {L"Foo",
1269     "",
1270     false,
1271     10000,
1272     base::Time(),
1273     "<script>addRow(\"Foo\",\"Foo\",0,\"9.8 kB\",\"\");</script>\n"},
1274    {L"quo\"tes",
1275     "",
1276     false,
1277     10000,
1278     base::Time(),
1279     "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1280         "\n"},
1281    {L"quo\"tes",
1282     "quo\"tes",
1283     false,
1284     10000,
1285     base::Time(),
1286     "<script>addRow(\"quo\\\"tes\",\"quo%22tes\",0,\"9.8 kB\",\"\");</script>"
1287         "\n"},
1288    // U+D55C0 U+AE00. raw_bytes is empty (either a local file with
1289    // UTF-8/UTF-16 encoding or a remote file on an ftp server using UTF-8
1290    {L"\xD55C\xAE00.txt",
1291     "",
1292     false,
1293     10000,
1294     base::Time(),
1295     "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%ED%95%9C%EA%B8%80.txt\""
1296         ",0,\"9.8 kB\",\"\");</script>\n"},
1297    // U+D55C0 U+AE00. raw_bytes is the corresponding EUC-KR sequence:
1298    // a local or remote file in EUC-KR.
1299    {L"\xD55C\xAE00.txt",
1300     "\xC7\xD1\xB1\xDB.txt",
1301     false,
1302     10000,
1303     base::Time(),
1304     "<script>addRow(\"\\uD55C\\uAE00.txt\",\"%C7%D1%B1%DB.txt\""
1305         ",0,\"9.8 kB\",\"\");</script>\n"},
1306  };
1307
1308  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1309    const std::string results = GetDirectoryListingEntry(
1310        WideToUTF16(test_cases[i].name),
1311        test_cases[i].raw_bytes,
1312        test_cases[i].is_dir,
1313        test_cases[i].filesize,
1314        test_cases[i].time);
1315    EXPECT_EQ(test_cases[i].expected, results);
1316  }
1317}
1318
1319#endif
1320
1321TEST(NetUtilTest, ParseHostAndPort) {
1322  const struct {
1323    const char* input;
1324    bool success;
1325    const char* expected_host;
1326    int expected_port;
1327  } tests[] = {
1328    // Valid inputs:
1329    {"foo:10", true, "foo", 10},
1330    {"foo", true, "foo", -1},
1331    {
1332      "[1080:0:0:0:8:800:200C:4171]:11",
1333      true,
1334      "[1080:0:0:0:8:800:200C:4171]",
1335      11,
1336    },
1337    // Invalid inputs:
1338    {"foo:bar", false, "", -1},
1339    {"foo:", false, "", -1},
1340    {":", false, "", -1},
1341    {":80", false, "", -1},
1342    {"", false, "", -1},
1343    {"porttoolong:300000", false, "", -1},
1344    {"usrname@host", false, "", -1},
1345    {"usrname:password@host", false, "", -1},
1346    {":password@host", false, "", -1},
1347    {":password@host:80", false, "", -1},
1348    {":password@host", false, "", -1},
1349    {"@host", false, "", -1},
1350  };
1351
1352  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1353    std::string host;
1354    int port;
1355    bool ok = ParseHostAndPort(tests[i].input, &host, &port);
1356
1357    EXPECT_EQ(tests[i].success, ok);
1358
1359    if (tests[i].success) {
1360      EXPECT_EQ(tests[i].expected_host, host);
1361      EXPECT_EQ(tests[i].expected_port, port);
1362    }
1363  }
1364}
1365
1366TEST(NetUtilTest, GetHostAndPort) {
1367  const struct {
1368    GURL url;
1369    const char* expected_host_and_port;
1370  } tests[] = {
1371    { GURL("http://www.foo.com/x"), "www.foo.com:80"},
1372    { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1373
1374    // For IPv6 literals should always include the brackets.
1375    { GURL("http://[1::2]/x"), "[1::2]:80"},
1376    { GURL("http://[::a]:33/x"), "[::a]:33"},
1377  };
1378  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1379    std::string host_and_port = GetHostAndPort(tests[i].url);
1380    EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1381  }
1382}
1383
1384TEST(NetUtilTest, GetHostAndOptionalPort) {
1385  const struct {
1386    GURL url;
1387    const char* expected_host_and_port;
1388  } tests[] = {
1389    { GURL("http://www.foo.com/x"), "www.foo.com"},
1390    { GURL("http://www.foo.com:21/x"), "www.foo.com:21"},
1391
1392    // For IPv6 literals should always include the brackets.
1393    { GURL("http://[1::2]/x"), "[1::2]"},
1394    { GURL("http://[::a]:33/x"), "[::a]:33"},
1395  };
1396  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1397    std::string host_and_port = GetHostAndOptionalPort(tests[i].url);
1398    EXPECT_EQ(std::string(tests[i].expected_host_and_port), host_and_port);
1399  }
1400}
1401
1402
1403TEST(NetUtilTest, NetAddressToString_IPv4) {
1404  const struct {
1405    uint8 addr[4];
1406    const char* result;
1407  } tests[] = {
1408    {{0, 0, 0, 0}, "0.0.0.0"},
1409    {{127, 0, 0, 1}, "127.0.0.1"},
1410    {{192, 168, 0, 1}, "192.168.0.1"},
1411  };
1412
1413  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1414    const addrinfo* ai = GetIPv4Address(tests[i].addr, 80);
1415    std::string result = NetAddressToString(ai);
1416    EXPECT_EQ(std::string(tests[i].result), result);
1417  }
1418}
1419
1420TEST(NetUtilTest, NetAddressToString_IPv6) {
1421  const struct {
1422    uint8 addr[16];
1423    const char* result;
1424  } tests[] = {
1425    {{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1426      0x98, 0x76, 0x54, 0x32, 0x10},
1427     "fedc:ba98:7654:3210:fedc:ba98:7654:3210"},
1428  };
1429
1430  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
1431    const addrinfo* ai = GetIPv6Address(tests[i].addr, 80);
1432    std::string result = NetAddressToString(ai);
1433    // Allow NetAddressToString() to fail, in case the system doesn't
1434    // support IPv6.
1435    if (!result.empty())
1436      EXPECT_EQ(std::string(tests[i].result), result);
1437  }
1438}
1439
1440TEST(NetUtilTest, NetAddressToStringWithPort_IPv4) {
1441  uint8 addr[] = {127, 0, 0, 1};
1442  const addrinfo* ai = GetIPv4Address(addr, 166);
1443  std::string result = NetAddressToStringWithPort(ai);
1444  EXPECT_EQ("127.0.0.1:166", result);
1445}
1446
1447TEST(NetUtilTest, NetAddressToStringWithPort_IPv6) {
1448  uint8 addr[] = {
1449      0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10, 0xFE, 0xDC, 0xBA,
1450      0x98, 0x76, 0x54, 0x32, 0x10
1451  };
1452  const addrinfo* ai = GetIPv6Address(addr, 361);
1453  std::string result = NetAddressToStringWithPort(ai);
1454
1455  // May fail on systems that don't support IPv6.
1456  if (!result.empty())
1457    EXPECT_EQ("[fedc:ba98:7654:3210:fedc:ba98:7654:3210]:361", result);
1458}
1459
1460TEST(NetUtilTest, GetHostName) {
1461  // We can't check the result of GetHostName() directly, since the result
1462  // will differ across machines. Our goal here is to simply exercise the
1463  // code path, and check that things "look about right".
1464  std::string hostname = GetHostName();
1465  EXPECT_FALSE(hostname.empty());
1466}
1467
1468TEST(NetUtilTest, FormatUrl) {
1469  FormatUrlTypes default_format_type = kFormatUrlOmitUsernamePassword;
1470  const UrlTestData tests[] = {
1471    {"Empty URL", "", "", default_format_type, UnescapeRule::NORMAL, L"", 0},
1472
1473    {"Simple URL",
1474     "http://www.google.com/", "", default_format_type, UnescapeRule::NORMAL,
1475     L"http://www.google.com/", 7},
1476
1477    {"With a port number and a reference",
1478     "http://www.google.com:8080/#\xE3\x82\xB0", "", default_format_type,
1479     UnescapeRule::NORMAL,
1480     L"http://www.google.com:8080/#\x30B0", 7},
1481
1482    // -------- IDN tests --------
1483    {"Japanese IDN with ja",
1484     "http://xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1485     UnescapeRule::NORMAL, L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1486
1487    {"Japanese IDN with en",
1488     "http://xn--l8jvb1ey91xtjb.jp", "en", default_format_type,
1489     UnescapeRule::NORMAL, L"http://xn--l8jvb1ey91xtjb.jp/", 7},
1490
1491    {"Japanese IDN without any languages",
1492     "http://xn--l8jvb1ey91xtjb.jp", "", default_format_type,
1493     UnescapeRule::NORMAL,
1494     // Single script is safe for empty languages.
1495     L"http://\x671d\x65e5\x3042\x3055\x3072.jp/", 7},
1496
1497    {"mailto: with Japanese IDN",
1498     "mailto:foo@xn--l8jvb1ey91xtjb.jp", "ja", default_format_type,
1499     UnescapeRule::NORMAL,
1500     // GURL doesn't assume an email address's domain part as a host name.
1501     L"mailto:foo@xn--l8jvb1ey91xtjb.jp", 7},
1502
1503    {"file: with Japanese IDN",
1504     "file://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1505     UnescapeRule::NORMAL,
1506     L"file://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 7},
1507
1508    {"ftp: with Japanese IDN",
1509     "ftp://xn--l8jvb1ey91xtjb.jp/config.sys", "ja", default_format_type,
1510     UnescapeRule::NORMAL,
1511     L"ftp://\x671d\x65e5\x3042\x3055\x3072.jp/config.sys", 6},
1512
1513    // -------- omit_username_password flag tests --------
1514    {"With username and password, omit_username_password=false",
1515     "http://user:passwd@example.com/foo", "",
1516     kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1517     L"http://user:passwd@example.com/foo", 19},
1518
1519    {"With username and password, omit_username_password=true",
1520     "http://user:passwd@example.com/foo", "", default_format_type,
1521     UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1522
1523    {"With username and no password",
1524     "http://user@example.com/foo", "", default_format_type,
1525     UnescapeRule::NORMAL, L"http://example.com/foo", 7},
1526
1527    {"Just '@' without username and password",
1528     "http://@example.com/foo", "", default_format_type, UnescapeRule::NORMAL,
1529     L"http://example.com/foo", 7},
1530
1531    // GURL doesn't think local-part of an email address is username for URL.
1532    {"mailto:, omit_username_password=true",
1533     "mailto:foo@example.com", "", default_format_type, UnescapeRule::NORMAL,
1534     L"mailto:foo@example.com", 7},
1535
1536    // -------- unescape flag tests --------
1537    {"Do not unescape",
1538     "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1539     "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1540     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1541     UnescapeRule::NONE,
1542     // GURL parses %-encoded hostnames into Punycode.
1543     L"http://xn--qcka1pmc.jp/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1544     L"?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", 7},
1545
1546    {"Unescape normally",
1547     "http://%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB.jp/"
1548     "%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"
1549     "?q=%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB", "en", default_format_type,
1550     UnescapeRule::NORMAL,
1551     L"http://xn--qcka1pmc.jp/\x30B0\x30FC\x30B0\x30EB"
1552     L"?q=\x30B0\x30FC\x30B0\x30EB", 7},
1553
1554    {"Unescape normally including unescape spaces",
1555     "http://www.google.com/search?q=Hello%20World", "en", default_format_type,
1556     UnescapeRule::SPACES, L"http://www.google.com/search?q=Hello World", 7},
1557
1558    /*
1559    {"unescape=true with some special characters",
1560    "http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", "",
1561    kFormatUrlOmitNothing, UnescapeRule::NORMAL,
1562    L"http://user%3A:%40passwd@example.com/foo%3Fbar?q=b%26z", 25},
1563    */
1564    // Disabled: the resultant URL becomes "...user%253A:%2540passwd...".
1565
1566    // -------- omit http: --------
1567    {"omit http with user name",
1568     "http://user@example.com/foo", "", kFormatUrlOmitAll,
1569     UnescapeRule::NORMAL, L"example.com/foo", 0},
1570
1571    {"omit http",
1572     "http://www.google.com/", "en", kFormatUrlOmitHTTP,
1573     UnescapeRule::NORMAL, L"www.google.com/",
1574     0},
1575
1576    {"omit http with https",
1577     "https://www.google.com/", "en", kFormatUrlOmitHTTP,
1578     UnescapeRule::NORMAL, L"https://www.google.com/",
1579     8},
1580
1581    {"omit http starts with ftp.",
1582     "http://ftp.google.com/", "en", kFormatUrlOmitHTTP,
1583     UnescapeRule::NORMAL, L"http://ftp.google.com/",
1584     7},
1585
1586    // -------- omit trailing slash on bare hostname --------
1587    {"omit slash when it's the entire path",
1588     "http://www.google.com/", "en",
1589     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1590     L"http://www.google.com", 7},
1591    {"omit slash when there's a ref",
1592     "http://www.google.com/#ref", "en",
1593     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1594     L"http://www.google.com/#ref", 7},
1595    {"omit slash when there's a query",
1596     "http://www.google.com/?", "en",
1597     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1598     L"http://www.google.com/?", 7},
1599    {"omit slash when it's not the entire path",
1600     "http://www.google.com/foo", "en",
1601     kFormatUrlOmitTrailingSlashOnBareHostname, UnescapeRule::NORMAL,
1602     L"http://www.google.com/foo", 7},
1603    {"omit slash for nonstandard URLs",
1604     "data:/", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1605     UnescapeRule::NORMAL, L"data:/", 5},
1606    {"omit slash for file URLs",
1607     "file:///", "en", kFormatUrlOmitTrailingSlashOnBareHostname,
1608     UnescapeRule::NORMAL, L"file:///", 7},
1609
1610    // -------- view-source: --------
1611    {"view-source",
1612     "view-source:http://xn--qcka1pmc.jp/", "ja", default_format_type,
1613     UnescapeRule::NORMAL, L"view-source:http://\x30B0\x30FC\x30B0\x30EB.jp/",
1614     19},
1615
1616    {"view-source of view-source",
1617     "view-source:view-source:http://xn--qcka1pmc.jp/", "ja",
1618     default_format_type, UnescapeRule::NORMAL,
1619     L"view-source:view-source:http://xn--qcka1pmc.jp/", 12},
1620
1621    // view-source should omit http and trailing slash where non-view-source
1622    // would.
1623    {"view-source omit http",
1624     "view-source:http://a.b/c", "en", kFormatUrlOmitAll,
1625     UnescapeRule::NORMAL, L"view-source:a.b/c",
1626     12},
1627    {"view-source omit http starts with ftp.",
1628     "view-source:http://ftp.b/c", "en", kFormatUrlOmitAll,
1629     UnescapeRule::NORMAL, L"view-source:http://ftp.b/c",
1630     19},
1631    {"view-source omit slash when it's the entire path",
1632     "view-source:http://a.b/", "en", kFormatUrlOmitAll,
1633     UnescapeRule::NORMAL, L"view-source:a.b",
1634     12},
1635  };
1636
1637  for (size_t i = 0; i < arraysize(tests); ++i) {
1638    size_t prefix_len;
1639    string16 formatted = FormatUrl(
1640        GURL(tests[i].input), tests[i].languages, tests[i].format_types,
1641        tests[i].escape_rules, NULL, &prefix_len, NULL);
1642    EXPECT_EQ(WideToUTF16(tests[i].output), formatted) << tests[i].description;
1643    EXPECT_EQ(tests[i].prefix_len, prefix_len) << tests[i].description;
1644  }
1645}
1646
1647TEST(NetUtilTest, FormatUrlParsed) {
1648  // No unescape case.
1649  url_parse::Parsed parsed;
1650  string16 formatted = FormatUrl(
1651      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1652           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1653      "ja", kFormatUrlOmitNothing, UnescapeRule::NONE, &parsed, NULL,
1654      NULL);
1655  EXPECT_EQ(WideToUTF16(
1656      L"http://%E3%82%B0:%E3%83%BC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1657      L"/%E3%82%B0/?q=%E3%82%B0#\x30B0"), formatted);
1658  EXPECT_EQ(WideToUTF16(L"%E3%82%B0"),
1659      formatted.substr(parsed.username.begin, parsed.username.len));
1660  EXPECT_EQ(WideToUTF16(L"%E3%83%BC"),
1661      formatted.substr(parsed.password.begin, parsed.password.len));
1662  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1663      formatted.substr(parsed.host.begin, parsed.host.len));
1664  EXPECT_EQ(WideToUTF16(L"8080"),
1665      formatted.substr(parsed.port.begin, parsed.port.len));
1666  EXPECT_EQ(WideToUTF16(L"/%E3%82%B0/"),
1667      formatted.substr(parsed.path.begin, parsed.path.len));
1668  EXPECT_EQ(WideToUTF16(L"q=%E3%82%B0"),
1669      formatted.substr(parsed.query.begin, parsed.query.len));
1670  EXPECT_EQ(WideToUTF16(L"\x30B0"),
1671      formatted.substr(parsed.ref.begin, parsed.ref.len));
1672
1673  // Unescape case.
1674  formatted = FormatUrl(
1675      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1676           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1677      "ja", kFormatUrlOmitNothing, UnescapeRule::NORMAL, &parsed, NULL,
1678      NULL);
1679  EXPECT_EQ(WideToUTF16(L"http://\x30B0:\x30FC@\x30B0\x30FC\x30B0\x30EB.jp:8080"
1680      L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1681  EXPECT_EQ(WideToUTF16(L"\x30B0"),
1682      formatted.substr(parsed.username.begin, parsed.username.len));
1683  EXPECT_EQ(WideToUTF16(L"\x30FC"),
1684      formatted.substr(parsed.password.begin, parsed.password.len));
1685  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1686      formatted.substr(parsed.host.begin, parsed.host.len));
1687  EXPECT_EQ(WideToUTF16(L"8080"),
1688      formatted.substr(parsed.port.begin, parsed.port.len));
1689  EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1690      formatted.substr(parsed.path.begin, parsed.path.len));
1691  EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1692      formatted.substr(parsed.query.begin, parsed.query.len));
1693  EXPECT_EQ(WideToUTF16(L"\x30B0"),
1694      formatted.substr(parsed.ref.begin, parsed.ref.len));
1695
1696  // Omit_username_password + unescape case.
1697  formatted = FormatUrl(
1698      GURL("http://\xE3\x82\xB0:\xE3\x83\xBC@xn--qcka1pmc.jp:8080/"
1699           "%E3%82%B0/?q=%E3%82%B0#\xE3\x82\xB0"),
1700      "ja", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1701      NULL, NULL);
1702  EXPECT_EQ(WideToUTF16(L"http://\x30B0\x30FC\x30B0\x30EB.jp:8080"
1703      L"/\x30B0/?q=\x30B0#\x30B0"), formatted);
1704  EXPECT_FALSE(parsed.username.is_valid());
1705  EXPECT_FALSE(parsed.password.is_valid());
1706  EXPECT_EQ(WideToUTF16(L"\x30B0\x30FC\x30B0\x30EB.jp"),
1707      formatted.substr(parsed.host.begin, parsed.host.len));
1708  EXPECT_EQ(WideToUTF16(L"8080"),
1709      formatted.substr(parsed.port.begin, parsed.port.len));
1710  EXPECT_EQ(WideToUTF16(L"/\x30B0/"),
1711      formatted.substr(parsed.path.begin, parsed.path.len));
1712  EXPECT_EQ(WideToUTF16(L"q=\x30B0"),
1713      formatted.substr(parsed.query.begin, parsed.query.len));
1714  EXPECT_EQ(WideToUTF16(L"\x30B0"),
1715      formatted.substr(parsed.ref.begin, parsed.ref.len));
1716
1717  // View-source case.
1718  formatted = FormatUrl(
1719      GURL("view-source:http://user:passwd@host:81/path?query#ref"),
1720      "", kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, &parsed,
1721      NULL, NULL);
1722  EXPECT_EQ(WideToUTF16(L"view-source:http://host:81/path?query#ref"),
1723      formatted);
1724  EXPECT_EQ(WideToUTF16(L"view-source:http"),
1725      formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1726  EXPECT_FALSE(parsed.username.is_valid());
1727  EXPECT_FALSE(parsed.password.is_valid());
1728  EXPECT_EQ(WideToUTF16(L"host"),
1729      formatted.substr(parsed.host.begin, parsed.host.len));
1730  EXPECT_EQ(WideToUTF16(L"81"),
1731      formatted.substr(parsed.port.begin, parsed.port.len));
1732  EXPECT_EQ(WideToUTF16(L"/path"),
1733      formatted.substr(parsed.path.begin, parsed.path.len));
1734  EXPECT_EQ(WideToUTF16(L"query"),
1735      formatted.substr(parsed.query.begin, parsed.query.len));
1736  EXPECT_EQ(WideToUTF16(L"ref"),
1737      formatted.substr(parsed.ref.begin, parsed.ref.len));
1738
1739  // omit http case.
1740  formatted = FormatUrl(
1741      GURL("http://host:8000/a?b=c#d"),
1742      "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1743  EXPECT_EQ(WideToUTF16(L"host:8000/a?b=c#d"), formatted);
1744  EXPECT_FALSE(parsed.scheme.is_valid());
1745  EXPECT_FALSE(parsed.username.is_valid());
1746  EXPECT_FALSE(parsed.password.is_valid());
1747  EXPECT_EQ(WideToUTF16(L"host"),
1748      formatted.substr(parsed.host.begin, parsed.host.len));
1749  EXPECT_EQ(WideToUTF16(L"8000"),
1750      formatted.substr(parsed.port.begin, parsed.port.len));
1751  EXPECT_EQ(WideToUTF16(L"/a"),
1752      formatted.substr(parsed.path.begin, parsed.path.len));
1753  EXPECT_EQ(WideToUTF16(L"b=c"),
1754      formatted.substr(parsed.query.begin, parsed.query.len));
1755  EXPECT_EQ(WideToUTF16(L"d"),
1756      formatted.substr(parsed.ref.begin, parsed.ref.len));
1757
1758  // omit http starts with ftp case.
1759  formatted = FormatUrl(
1760      GURL("http://ftp.host:8000/a?b=c#d"),
1761      "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1762  EXPECT_EQ(WideToUTF16(L"http://ftp.host:8000/a?b=c#d"), formatted);
1763  EXPECT_TRUE(parsed.scheme.is_valid());
1764  EXPECT_FALSE(parsed.username.is_valid());
1765  EXPECT_FALSE(parsed.password.is_valid());
1766  EXPECT_EQ(WideToUTF16(L"http"),
1767      formatted.substr(parsed.scheme.begin, parsed.scheme.len));
1768  EXPECT_EQ(WideToUTF16(L"ftp.host"),
1769      formatted.substr(parsed.host.begin, parsed.host.len));
1770  EXPECT_EQ(WideToUTF16(L"8000"),
1771      formatted.substr(parsed.port.begin, parsed.port.len));
1772  EXPECT_EQ(WideToUTF16(L"/a"),
1773      formatted.substr(parsed.path.begin, parsed.path.len));
1774  EXPECT_EQ(WideToUTF16(L"b=c"),
1775      formatted.substr(parsed.query.begin, parsed.query.len));
1776  EXPECT_EQ(WideToUTF16(L"d"),
1777      formatted.substr(parsed.ref.begin, parsed.ref.len));
1778
1779  // omit http starts with 'f' case.
1780  formatted = FormatUrl(
1781      GURL("http://f/"),
1782      "", kFormatUrlOmitHTTP, UnescapeRule::NORMAL, &parsed, NULL, NULL);
1783  EXPECT_EQ(WideToUTF16(L"f/"), formatted);
1784  EXPECT_FALSE(parsed.scheme.is_valid());
1785  EXPECT_FALSE(parsed.username.is_valid());
1786  EXPECT_FALSE(parsed.password.is_valid());
1787  EXPECT_FALSE(parsed.port.is_valid());
1788  EXPECT_TRUE(parsed.path.is_valid());
1789  EXPECT_FALSE(parsed.query.is_valid());
1790  EXPECT_FALSE(parsed.ref.is_valid());
1791  EXPECT_EQ(WideToUTF16(L"f"),
1792      formatted.substr(parsed.host.begin, parsed.host.len));
1793  EXPECT_EQ(WideToUTF16(L"/"),
1794      formatted.substr(parsed.path.begin, parsed.path.len));
1795}
1796
1797TEST(NetUtilTest, FormatUrlAdjustOffset) {
1798  const AdjustOffsetCase basic_cases[] = {
1799    {0, 0},
1800    {3, 3},
1801    {5, 5},
1802    {6, 6},
1803    {13, 13},
1804    {21, 21},
1805    {22, 22},
1806    {23, 23},
1807    {25, 25},
1808    {26, string16::npos},
1809    {500000, string16::npos},
1810    {string16::npos, string16::npos},
1811  };
1812  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(basic_cases); ++i) {
1813    size_t offset = basic_cases[i].input_offset;
1814    FormatUrl(GURL("http://www.google.com/foo/"), "en",
1815                   kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1816                   NULL, NULL, &offset);
1817    EXPECT_EQ(basic_cases[i].output_offset, offset);
1818  }
1819
1820  size_t url_size = 26;
1821  std::vector<size_t> offsets;
1822  for (size_t i = 0; i < url_size + 1; ++i)
1823    offsets.push_back(i);
1824  FormatUrlWithOffsets(GURL("http://www.google.com/foo/"), "en",
1825                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1826                       NULL, NULL, &offsets);
1827  for (size_t i = 0; i < url_size; ++i)
1828    EXPECT_EQ(i, offsets[i]);
1829  EXPECT_EQ(kNpos, offsets[url_size]);
1830
1831  const struct {
1832    const char* input_url;
1833    size_t input_offset;
1834    size_t output_offset;
1835  } omit_auth_cases[] = {
1836    {"http://foo:bar@www.google.com/", 6, 6},
1837    {"http://foo:bar@www.google.com/", 7, string16::npos},
1838    {"http://foo:bar@www.google.com/", 8, string16::npos},
1839    {"http://foo:bar@www.google.com/", 10, string16::npos},
1840    {"http://foo:bar@www.google.com/", 11, string16::npos},
1841    {"http://foo:bar@www.google.com/", 14, string16::npos},
1842    {"http://foo:bar@www.google.com/", 15, 7},
1843    {"http://foo:bar@www.google.com/", 25, 17},
1844    {"http://foo@www.google.com/", 9, string16::npos},
1845    {"http://foo@www.google.com/", 11, 7},
1846  };
1847  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_auth_cases); ++i) {
1848    size_t offset = omit_auth_cases[i].input_offset;
1849    FormatUrl(GURL(omit_auth_cases[i].input_url), "en",
1850                   kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1851                   NULL, NULL, &offset);
1852    EXPECT_EQ(omit_auth_cases[i].output_offset, offset);
1853  }
1854
1855  url_size = 30;
1856  offsets.clear();
1857  for (size_t i = 0; i < url_size; ++i)
1858    offsets.push_back(i);
1859  FormatUrlWithOffsets(GURL("http://foo:bar@www.google.com/"), "en",
1860                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1861                       NULL, NULL, &offsets);
1862  for (size_t i = 0; i < 7; ++i)
1863    EXPECT_EQ(i, offsets[i]);
1864  for (size_t i = 7; i < 15; ++i)
1865    EXPECT_EQ(kNpos, offsets[i]);
1866  for (size_t i = 16; i < url_size; ++i)
1867    EXPECT_EQ(i - 8 , offsets[i]);
1868
1869  const AdjustOffsetCase view_source_cases[] = {
1870    {0, 0},
1871    {3, 3},
1872    {11, 11},
1873    {12, 12},
1874    {13, 13},
1875    {18, 18},
1876    {19, string16::npos},
1877    {20, string16::npos},
1878    {23, 19},
1879    {26, 22},
1880    {string16::npos, string16::npos},
1881  };
1882  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(view_source_cases); ++i) {
1883    size_t offset = view_source_cases[i].input_offset;
1884    FormatUrl(GURL("view-source:http://foo@www.google.com/"), "en",
1885                   kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1886                   NULL, NULL, &offset);
1887    EXPECT_EQ(view_source_cases[i].output_offset, offset);
1888  }
1889
1890  url_size = 38;
1891  offsets.clear();
1892  for (size_t i = 0; i < url_size; ++i)
1893    offsets.push_back(i);
1894  FormatUrlWithOffsets(GURL("view-source:http://foo@www.google.com/"), "en",
1895                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1896                       NULL, NULL, &offsets);
1897  size_t expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
1898                       17, 18, kNpos, kNpos, kNpos, kNpos, 19, 20, 21, 22, 23,
1899                       24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
1900  ASSERT_EQ(url_size, arraysize(expected));
1901  for (size_t i = 0; i < url_size; ++i)
1902    EXPECT_EQ(expected[i], offsets[i]);
1903
1904  const AdjustOffsetCase idn_hostname_cases[] = {
1905    {8, string16::npos},
1906    {16, string16::npos},
1907    {24, string16::npos},
1908    {25, 12},
1909    {30, 17},
1910  };
1911  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(idn_hostname_cases); ++i) {
1912    size_t offset = idn_hostname_cases[i].input_offset;
1913    // "http://\x671d\x65e5\x3042\x3055\x3072.jp/foo/"
1914    FormatUrl(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1915                   kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1916                   NULL, NULL, &offset);
1917    EXPECT_EQ(idn_hostname_cases[i].output_offset, offset);
1918  }
1919
1920  url_size = 33;
1921  offsets.clear();
1922  for (size_t i = 0; i < url_size; ++i)
1923    offsets.push_back(i);
1924  FormatUrlWithOffsets(GURL("http://xn--l8jvb1ey91xtjb.jp/foo/"), "ja",
1925                       kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL,
1926                       NULL, NULL, &offsets);
1927  size_t expected_1[] = {0, 1, 2, 3, 4, 5, 6, 7, kNpos, kNpos, kNpos, kNpos,
1928                         kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
1929                         kNpos, kNpos, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16,
1930                         17, 18, 19};
1931  ASSERT_EQ(url_size, arraysize(expected_1));
1932  for (size_t i = 0; i < url_size; ++i)
1933    EXPECT_EQ(expected_1[i], offsets[i]);
1934
1935  const AdjustOffsetCase unescape_cases[] = {
1936    {25, 25},
1937    {26, string16::npos},
1938    {27, string16::npos},
1939    {28, 26},
1940    {35, string16::npos},
1941    {41, 31},
1942    {59, 33},
1943    {60, string16::npos},
1944    {67, string16::npos},
1945    {68, string16::npos},
1946  };
1947  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(unescape_cases); ++i) {
1948    size_t offset = unescape_cases[i].input_offset;
1949    // "http://www.google.com/foo bar/\x30B0\x30FC\x30B0\x30EB"
1950    FormatUrl(GURL(
1951        "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1952        "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL,
1953        NULL, &offset);
1954    EXPECT_EQ(unescape_cases[i].output_offset, offset);
1955  }
1956
1957  url_size = 68;
1958  offsets.clear();
1959  for (size_t i = 0; i < url_size; ++i)
1960    offsets.push_back(i);
1961  FormatUrlWithOffsets(GURL(
1962      "http://www.google.com/foo%20bar/%E3%82%B0%E3%83%BC%E3%82%B0%E3%83%AB"),
1963      "en", kFormatUrlOmitUsernamePassword, UnescapeRule::SPACES, NULL, NULL,
1964      &offsets);
1965  size_t expected_2[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
1966                         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, kNpos, kNpos,
1967                         26, 27, 28, 29, 30, kNpos, kNpos, kNpos, kNpos, kNpos,
1968                         kNpos, kNpos, kNpos, 31, kNpos, kNpos, kNpos, kNpos,
1969                         kNpos, kNpos, kNpos, kNpos, 32, kNpos, kNpos, kNpos,
1970                         kNpos, kNpos, kNpos, kNpos, kNpos, 33, kNpos, kNpos,
1971                         kNpos, kNpos, kNpos, kNpos, kNpos, kNpos};
1972  ASSERT_EQ(url_size, arraysize(expected_2));
1973  for (size_t i = 0; i < url_size; ++i)
1974    EXPECT_EQ(expected_2[i], offsets[i]);
1975
1976  const AdjustOffsetCase ref_cases[] = {
1977    {30, 30},
1978    {31, 31},
1979    {32, string16::npos},
1980    {34, 32},
1981    {35, string16::npos},
1982    {37, 33},
1983    {38, string16::npos},
1984  };
1985  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(ref_cases); ++i) {
1986    size_t offset = ref_cases[i].input_offset;
1987    // "http://www.google.com/foo.html#\x30B0\x30B0z"
1988    FormatUrl(GURL(
1989        "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
1990        kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
1991        &offset);
1992    EXPECT_EQ(ref_cases[i].output_offset, offset);
1993  }
1994
1995  url_size = 38;
1996  offsets.clear();
1997  for (size_t i = 0; i < url_size; ++i)
1998    offsets.push_back(i);
1999  // "http://www.google.com/foo.html#\x30B0\x30B0z"
2000  FormatUrlWithOffsets(GURL(
2001      "http://www.google.com/foo.html#\xE3\x82\xB0\xE3\x82\xB0z"), "en",
2002      kFormatUrlOmitUsernamePassword, UnescapeRule::NORMAL, NULL, NULL,
2003      &offsets);
2004  size_t expected_3[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2005                         16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
2006                         30, 31, kNpos, kNpos, 32, kNpos, kNpos, 33};
2007  ASSERT_EQ(url_size, arraysize(expected_3));
2008  for (size_t i = 0; i < url_size; ++i)
2009    EXPECT_EQ(expected_3[i], offsets[i]);
2010
2011  const AdjustOffsetCase omit_http_cases[] = {
2012    {0, string16::npos},
2013    {3, string16::npos},
2014    {7, 0},
2015    {8, 1},
2016  };
2017  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_cases); ++i) {
2018    size_t offset = omit_http_cases[i].input_offset;
2019    FormatUrl(GURL("http://www.google.com"), "en",
2020        kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2021    EXPECT_EQ(omit_http_cases[i].output_offset, offset);
2022  }
2023
2024  url_size = 23;
2025  offsets.clear();
2026  for (size_t i = 0; i < url_size; ++i)
2027    offsets.push_back(i);
2028  FormatUrlWithOffsets(GURL("http://www.google.com"), "en",
2029      kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2030  size_t expected_4[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 0, 1,
2031                         2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, kNpos};
2032  ASSERT_EQ(url_size, arraysize(expected_4));
2033  for (size_t i = 0; i < url_size; ++i)
2034    EXPECT_EQ(expected_4[i], offsets[i]);
2035
2036  const AdjustOffsetCase omit_http_start_with_ftp[] = {
2037    {0, 0},
2038    {3, 3},
2039    {8, 8},
2040  };
2041  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_http_start_with_ftp); ++i) {
2042    size_t offset = omit_http_start_with_ftp[i].input_offset;
2043    FormatUrl(GURL("http://ftp.google.com"), "en",
2044        kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offset);
2045    EXPECT_EQ(omit_http_start_with_ftp[i].output_offset, offset);
2046  }
2047
2048  url_size = 23;
2049  offsets.clear();
2050  for (size_t i = 0; i < url_size; ++i)
2051    offsets.push_back(i);
2052  FormatUrlWithOffsets(GURL("http://ftp.google.com"), "en",
2053      kFormatUrlOmitHTTP, UnescapeRule::NORMAL, NULL, NULL, &offsets);
2054  size_t expected_5[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2055                         16, 17, 18, 19, 20, 21, kNpos};
2056  ASSERT_EQ(url_size, arraysize(expected_5));
2057  for (size_t i = 0; i < url_size; ++i)
2058    EXPECT_EQ(expected_5[i], offsets[i]);
2059
2060  const AdjustOffsetCase omit_all_cases[] = {
2061    {12, 0},
2062    {13, 1},
2063    {0, string16::npos},
2064    {3, string16::npos},
2065  };
2066  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(omit_all_cases); ++i) {
2067    size_t offset = omit_all_cases[i].input_offset;
2068    FormatUrl(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2069                   UnescapeRule::NORMAL, NULL, NULL, &offset);
2070    EXPECT_EQ(omit_all_cases[i].output_offset, offset);
2071  }
2072
2073  url_size = 21;
2074  offsets.clear();
2075  for (size_t i = 0; i < url_size; ++i)
2076    offsets.push_back(i);
2077  FormatUrlWithOffsets(GURL("http://user@foo.com/"), "en", kFormatUrlOmitAll,
2078                       UnescapeRule::NORMAL, NULL, NULL, &offsets);
2079  size_t expected_6[] = {kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, kNpos,
2080                         kNpos, kNpos, kNpos, kNpos, 0, 1, 2, 3, 4, 5, 6, 7,
2081                         kNpos};
2082  ASSERT_EQ(url_size, arraysize(expected_6));
2083  for (size_t i = 0; i < url_size; ++i)
2084    EXPECT_EQ(expected_6[i], offsets[i]);
2085}
2086
2087TEST(NetUtilTest, SimplifyUrlForRequest) {
2088  struct {
2089    const char* input_url;
2090    const char* expected_simplified_url;
2091  } tests[] = {
2092    {
2093      // Reference section should be stripped.
2094      "http://www.google.com:78/foobar?query=1#hash",
2095      "http://www.google.com:78/foobar?query=1",
2096    },
2097    {
2098      // Reference section can itself contain #.
2099      "http://192.168.0.1?query=1#hash#10#11#13#14",
2100      "http://192.168.0.1?query=1",
2101    },
2102    { // Strip username/password.
2103      "http://user:pass@google.com",
2104      "http://google.com/",
2105    },
2106    { // Strip both the reference and the username/password.
2107      "http://user:pass@google.com:80/sup?yo#X#X",
2108      "http://google.com/sup?yo",
2109    },
2110    { // Try an HTTPS URL -- strip both the reference and the username/password.
2111      "https://user:pass@google.com:80/sup?yo#X#X",
2112      "https://google.com:80/sup?yo",
2113    },
2114    { // Try an FTP URL -- strip both the reference and the username/password.
2115      "ftp://user:pass@google.com:80/sup?yo#X#X",
2116      "ftp://google.com:80/sup?yo",
2117    },
2118    { // Try an nonstandard URL
2119      "foobar://user:pass@google.com:80/sup?yo#X#X",
2120      "foobar://user:pass@google.com:80/sup?yo#X#X",
2121    },
2122  };
2123  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2124    SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s", i,
2125                                    tests[i].input_url));
2126    GURL input_url(GURL(tests[i].input_url));
2127    GURL expected_url(GURL(tests[i].expected_simplified_url));
2128    EXPECT_EQ(expected_url, SimplifyUrlForRequest(input_url));
2129  }
2130}
2131
2132TEST(NetUtilTest, SetExplicitlyAllowedPortsTest) {
2133  std::string invalid[] = { "1,2,a", "'1','2'", "1, 2, 3", "1 0,11,12" };
2134  std::string valid[] = { "", "1", "1,2", "1,2,3", "10,11,12,13" };
2135
2136  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(invalid); ++i) {
2137    SetExplicitlyAllowedPorts(invalid[i]);
2138    EXPECT_EQ(0, static_cast<int>(explicitly_allowed_ports.size()));
2139  }
2140
2141  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(valid); ++i) {
2142    SetExplicitlyAllowedPorts(valid[i]);
2143    EXPECT_EQ(i, explicitly_allowed_ports.size());
2144  }
2145}
2146
2147TEST(NetUtilTest, GetHostOrSpecFromURL) {
2148  EXPECT_EQ("example.com",
2149            GetHostOrSpecFromURL(GURL("http://example.com/test")));
2150  EXPECT_EQ("example.com",
2151            GetHostOrSpecFromURL(GURL("http://example.com./test")));
2152  EXPECT_EQ("file:///tmp/test.html",
2153            GetHostOrSpecFromURL(GURL("file:///tmp/test.html")));
2154}
2155
2156// Test that invalid IP literals fail to parse.
2157TEST(NetUtilTest, ParseIPLiteralToNumber_FailParse) {
2158  IPAddressNumber number;
2159
2160  EXPECT_FALSE(ParseIPLiteralToNumber("bad value", &number));
2161  EXPECT_FALSE(ParseIPLiteralToNumber("bad:value", &number));
2162  EXPECT_FALSE(ParseIPLiteralToNumber("", &number));
2163  EXPECT_FALSE(ParseIPLiteralToNumber("192.168.0.1:30", &number));
2164  EXPECT_FALSE(ParseIPLiteralToNumber("  192.168.0.1  ", &number));
2165  EXPECT_FALSE(ParseIPLiteralToNumber("[::1]", &number));
2166}
2167
2168// Test parsing an IPv4 literal.
2169TEST(NetUtilTest, ParseIPLiteralToNumber_IPv4) {
2170  IPAddressNumber number;
2171  EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &number));
2172  EXPECT_EQ("192,168,0,1", DumpIPNumber(number));
2173}
2174
2175// Test parsing an IPv6 literal.
2176TEST(NetUtilTest, ParseIPLiteralToNumber_IPv6) {
2177  IPAddressNumber number;
2178  EXPECT_TRUE(ParseIPLiteralToNumber("1:abcd::3:4:ff", &number));
2179  EXPECT_EQ("0,1,171,205,0,0,0,0,0,0,0,3,0,4,0,255", DumpIPNumber(number));
2180}
2181
2182// Test mapping an IPv4 address to an IPv6 address.
2183TEST(NetUtilTest, ConvertIPv4NumberToIPv6Number) {
2184  IPAddressNumber ipv4_number;
2185  EXPECT_TRUE(ParseIPLiteralToNumber("192.168.0.1", &ipv4_number));
2186
2187  IPAddressNumber ipv6_number =
2188      ConvertIPv4NumberToIPv6Number(ipv4_number);
2189
2190  // ::ffff:192.168.1.1
2191  EXPECT_EQ("0,0,0,0,0,0,0,0,0,0,255,255,192,168,0,1",
2192            DumpIPNumber(ipv6_number));
2193}
2194
2195// Test parsing invalid CIDR notation literals.
2196TEST(NetUtilTest, ParseCIDRBlock_Invalid) {
2197  const char* bad_literals[] = {
2198      "foobar",
2199      "",
2200      "192.168.0.1",
2201      "::1",
2202      "/",
2203      "/1",
2204      "1",
2205      "192.168.1.1/-1",
2206      "192.168.1.1/33",
2207      "::1/-3",
2208      "a::3/129",
2209      "::1/x",
2210      "192.168.0.1//11"
2211  };
2212
2213  for (size_t i = 0; i < arraysize(bad_literals); ++i) {
2214    IPAddressNumber ip_number;
2215    size_t prefix_length_in_bits;
2216
2217    EXPECT_FALSE(ParseCIDRBlock(bad_literals[i],
2218                                     &ip_number,
2219                                     &prefix_length_in_bits));
2220  }
2221}
2222
2223// Test parsing a valid CIDR notation literal.
2224TEST(NetUtilTest, ParseCIDRBlock_Valid) {
2225  IPAddressNumber ip_number;
2226  size_t prefix_length_in_bits;
2227
2228  EXPECT_TRUE(ParseCIDRBlock("192.168.0.1/11",
2229                                  &ip_number,
2230                                  &prefix_length_in_bits));
2231
2232  EXPECT_EQ("192,168,0,1", DumpIPNumber(ip_number));
2233  EXPECT_EQ(11u, prefix_length_in_bits);
2234}
2235
2236TEST(NetUtilTest, IPNumberMatchesPrefix) {
2237  struct {
2238    const char* cidr_literal;
2239    const char* ip_literal;
2240    bool expected_to_match;
2241  } tests[] = {
2242    // IPv4 prefix with IPv4 inputs.
2243    {
2244      "10.10.1.32/27",
2245      "10.10.1.44",
2246      true
2247    },
2248    {
2249      "10.10.1.32/27",
2250      "10.10.1.90",
2251      false
2252    },
2253    {
2254      "10.10.1.32/27",
2255      "10.10.1.90",
2256      false
2257    },
2258
2259    // IPv6 prefix with IPv6 inputs.
2260    {
2261      "2001:db8::/32",
2262      "2001:DB8:3:4::5",
2263      true
2264    },
2265    {
2266      "2001:db8::/32",
2267      "2001:c8::",
2268      false
2269    },
2270
2271    // IPv6 prefix with IPv4 inputs.
2272    {
2273      "2001:db8::/33",
2274      "192.168.0.1",
2275      false
2276    },
2277    {
2278      "::ffff:192.168.0.1/112",
2279      "192.168.33.77",
2280      true
2281    },
2282
2283    // IPv4 prefix with IPv6 inputs.
2284    {
2285      "10.11.33.44/16",
2286      "::ffff:0a0b:89",
2287      true
2288    },
2289    {
2290      "10.11.33.44/16",
2291      "::ffff:10.12.33.44",
2292      false
2293    },
2294  };
2295  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(tests); ++i) {
2296    SCOPED_TRACE(base::StringPrintf("Test[%" PRIuS "]: %s, %s", i,
2297                                    tests[i].cidr_literal,
2298                                    tests[i].ip_literal));
2299
2300    IPAddressNumber ip_number;
2301    EXPECT_TRUE(ParseIPLiteralToNumber(tests[i].ip_literal, &ip_number));
2302
2303    IPAddressNumber ip_prefix;
2304    size_t prefix_length_in_bits;
2305
2306    EXPECT_TRUE(ParseCIDRBlock(tests[i].cidr_literal,
2307                               &ip_prefix,
2308                               &prefix_length_in_bits));
2309
2310    EXPECT_EQ(tests[i].expected_to_match,
2311              IPNumberMatchesPrefix(ip_number,
2312                                    ip_prefix,
2313                                    prefix_length_in_bits));
2314  }
2315}
2316
2317TEST(NetUtilTest, IsLocalhost) {
2318  EXPECT_TRUE(net::IsLocalhost("localhost"));
2319  EXPECT_TRUE(net::IsLocalhost("localhost.localdomain"));
2320  EXPECT_TRUE(net::IsLocalhost("localhost6"));
2321  EXPECT_TRUE(net::IsLocalhost("localhost6.localdomain6"));
2322  EXPECT_TRUE(net::IsLocalhost("127.0.0.1"));
2323  EXPECT_TRUE(net::IsLocalhost("127.0.1.0"));
2324  EXPECT_TRUE(net::IsLocalhost("127.1.0.0"));
2325  EXPECT_TRUE(net::IsLocalhost("127.0.0.255"));
2326  EXPECT_TRUE(net::IsLocalhost("127.0.255.0"));
2327  EXPECT_TRUE(net::IsLocalhost("127.255.0.0"));
2328  EXPECT_TRUE(net::IsLocalhost("::1"));
2329  EXPECT_TRUE(net::IsLocalhost("0:0:0:0:0:0:0:1"));
2330
2331  EXPECT_FALSE(net::IsLocalhost("localhostx"));
2332  EXPECT_FALSE(net::IsLocalhost("foo.localdomain"));
2333  EXPECT_FALSE(net::IsLocalhost("localhost6x"));
2334  EXPECT_FALSE(net::IsLocalhost("localhost.localdomain6"));
2335  EXPECT_FALSE(net::IsLocalhost("localhost6.localdomain"));
2336  EXPECT_FALSE(net::IsLocalhost("127.0.0.1.1"));
2337  EXPECT_FALSE(net::IsLocalhost(".127.0.0.255"));
2338  EXPECT_FALSE(net::IsLocalhost("::2"));
2339  EXPECT_FALSE(net::IsLocalhost("::1:1"));
2340  EXPECT_FALSE(net::IsLocalhost("0:0:0:0:1:0:0:1"));
2341  EXPECT_FALSE(net::IsLocalhost("::1:1"));
2342  EXPECT_FALSE(net::IsLocalhost("0:0:0:0:0:0:0:0:1"));
2343}
2344
2345// Verify GetNetworkList().
2346TEST(NetUtilTest, GetNetworkList) {
2347  NetworkInterfaceList list;
2348  ASSERT_TRUE(GetNetworkList(&list));
2349
2350  for (NetworkInterfaceList::iterator it = list.begin();
2351       it != list.end(); ++it) {
2352    // Verify that the name is not empty.
2353    EXPECT_FALSE(it->name.empty());
2354
2355    // Verify that the address is correct.
2356    EXPECT_TRUE(it->address.size() == kIPv4AddressSize ||
2357                it->address.size() == kIPv6AddressSize)
2358        << "Invalid address of size " << it->address.size();
2359    bool all_zeroes = true;
2360    for (size_t i = 0; i < it->address.size(); ++i) {
2361      if (it->address[i] != 0) {
2362        all_zeroes = false;
2363        break;
2364      }
2365    }
2366    EXPECT_FALSE(all_zeroes);
2367  }
2368}
2369
2370TEST(NetUtilTest, AdjustComponentOffset) {
2371  std::vector<size_t> old_offsets;
2372  for (size_t i = 0; i < 10; ++i)
2373    old_offsets.push_back(i);
2374  std::vector<size_t> new_offsets;
2375  std::transform(old_offsets.begin(),
2376                 old_offsets.end(),
2377                 std::back_inserter(new_offsets),
2378                 ClampComponentOffset(5));
2379  size_t expected_1[] = {kNpos, kNpos, kNpos, kNpos, kNpos, 5, 6, 7, 8, 9};
2380  EXPECT_EQ(new_offsets.size(), arraysize(expected_1));
2381  EXPECT_EQ(new_offsets.size(), old_offsets.size());
2382  for (size_t i = 0; i < arraysize(expected_1); ++i)
2383    EXPECT_EQ(expected_1[i], new_offsets[i]);
2384}
2385
2386}  // namespace net
2387