host.js revision 430b6b672341c7e8b5e4cfafaaae20315e68701b
1description("Canonicalization of host names.");
2
3cases = [
4  // Basic canonicalization, uppercase should be converted to lowercase.
5  ["GoOgLe.CoM", "google.com"],
6  // Spaces and some other characters should be escaped.
7  ["Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com"],
8  // Exciting different types of spaces!
9  ["GOO\u00a0\u3000goo.com", "goo%20%20goo.com"],
10  // Other types of space (no-break, zero-width, zero-width-no-break) are
11  // name-prepped away to nothing.
12  ["GOO\u200b\u2060\ufeffgoo.com", "googoo.com"],
13  // Ideographic full stop (full-width period for Chinese, etc.) should be
14  // treated as a dot.
15  ["www.foo\u3002" + "bar.com", "www.foo.bar.com"],
16  // Invalid unicode characters should fail...
17  // ...In wide input, ICU will barf and we'll end up with the input as
18  //    escaped UTF-8 (the invalid character should be replaced with the
19  //    replacement character).
20  ["\ufdd0zyx.com", "%EF%BF%BDzyx.com"],
21  // ...This is the same as previous but with with escaped.
22  ["%ef%b7%90zyx.com", "%EF%BF%BDzyx.com"],
23  // Test name prepping, fullwidth input should be converted to ASCII and NOT
24  // IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
25  ["\uff27\uff4f.com", "go.com"],
26  // Test that fullwidth escaped values are properly name-prepped,
27  // then converted or rejected.
28  // ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
29  ["\uff05\uff14\uff11.com", "a.com"],
30  ["%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com"],
31  // ...%00 in fullwidth should fail (also as escaped UTF-8 input)
32  ["\uff05\uff10\uff10.com", "%00.com"],
33  ["%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com"],
34  // Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
35  ["\u4f60\u597d\u4f60\u597d", "xn--6qqa088eba"],
36  // Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
37  // UTF-8 (wide case). The output should be equivalent to the true wide
38  // character input above).
39  ["%E4%BD%A0%E5%A5%BD\u4f60\u597d", "xn--6qqa088eba"],
40  // Invalid escaped characters should fail and the percents should be
41  // escaped.
42  ["%zz%66%a", "%25zzf%25a"],
43  // If we get an invalid character that has been escaped.
44  ["%25", "%25"],
45  ["hello%00", "hello%00"],
46  // Escaped numbers should be treated like IP addresses if they are.
47  ["%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1"],
48  ["%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1"],
49  // Invalid escaping should trigger the regular host error handling.
50  ["%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01"],
51  // Something that isn't exactly an IP should get treated as a host and
52  // spaces escaped.
53  ["192.168.0.1 hello", "192.168.0.1%20hello"],
54  // Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
55  // These are "0Xc0.0250.01" in fullwidth.
56  ["\uff10\uff38\uff43\uff10\uff0e\uff10\uff12\uff15\uff10\uff0e\uff10\uff11", "192.168.0.1"],
57  // Broken IP addresses get marked as such.
58  ["192.168.0.257", "192.168.0.257"],
59  ["[google.com]", "[google.com]"],
60  // Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e.
61  // if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect).
62  ["\u0442(", "xn--%28-7ed"]
63];
64
65for (var i = 0; i < cases.length; ++i) {
66  test_vector = cases[i][0];
67  expected_result = cases[i][1];
68  shouldBe("canonicalize('http://" + test_vector + "/')",
69           "'http://" + expected_result + "/'");
70}
71
72var successfullyParsed = true;
73