1import unittest
2import urllib.parse
3
4RFC1808_BASE = "http://a/b/c/d;p?q#f"
5RFC2396_BASE = "http://a/b/c/d;p?q"
6RFC3986_BASE = 'http://a/b/c/d;p?q'
7SIMPLE_BASE  = 'http://a/b/c/d'
8
9# A list of test cases.  Each test case is a two-tuple that contains
10# a string with the query and a dictionary with the expected result.
11
12parse_qsl_test_cases = [
13    ("", []),
14    ("&", []),
15    ("&&", []),
16    ("=", [('', '')]),
17    ("=a", [('', 'a')]),
18    ("a", [('a', '')]),
19    ("a=", [('a', '')]),
20    ("&a=b", [('a', 'b')]),
21    ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
22    ("a=1&a=2", [('a', '1'), ('a', '2')]),
23    (b"", []),
24    (b"&", []),
25    (b"&&", []),
26    (b"=", [(b'', b'')]),
27    (b"=a", [(b'', b'a')]),
28    (b"a", [(b'a', b'')]),
29    (b"a=", [(b'a', b'')]),
30    (b"&a=b", [(b'a', b'b')]),
31    (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
32    (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
33    (";", []),
34    (";;", []),
35    (";a=b", [('a', 'b')]),
36    ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
37    ("a=1;a=2", [('a', '1'), ('a', '2')]),
38    (b";", []),
39    (b";;", []),
40    (b";a=b", [(b'a', b'b')]),
41    (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
42    (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
43]
44
45parse_qs_test_cases = [
46    ("", {}),
47    ("&", {}),
48    ("&&", {}),
49    ("=", {'': ['']}),
50    ("=a", {'': ['a']}),
51    ("a", {'a': ['']}),
52    ("a=", {'a': ['']}),
53    ("&a=b", {'a': ['b']}),
54    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
55    ("a=1&a=2", {'a': ['1', '2']}),
56    (b"", {}),
57    (b"&", {}),
58    (b"&&", {}),
59    (b"=", {b'': [b'']}),
60    (b"=a", {b'': [b'a']}),
61    (b"a", {b'a': [b'']}),
62    (b"a=", {b'a': [b'']}),
63    (b"&a=b", {b'a': [b'b']}),
64    (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
65    (b"a=1&a=2", {b'a': [b'1', b'2']}),
66    (";", {}),
67    (";;", {}),
68    (";a=b", {'a': ['b']}),
69    ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
70    ("a=1;a=2", {'a': ['1', '2']}),
71    (b";", {}),
72    (b";;", {}),
73    (b";a=b", {b'a': [b'b']}),
74    (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
75    (b"a=1;a=2", {b'a': [b'1', b'2']}),
76]
77
78class UrlParseTestCase(unittest.TestCase):
79
80    def checkRoundtrips(self, url, parsed, split):
81        result = urllib.parse.urlparse(url)
82        self.assertEqual(result, parsed)
83        t = (result.scheme, result.netloc, result.path,
84             result.params, result.query, result.fragment)
85        self.assertEqual(t, parsed)
86        # put it back together and it should be the same
87        result2 = urllib.parse.urlunparse(result)
88        self.assertEqual(result2, url)
89        self.assertEqual(result2, result.geturl())
90
91        # the result of geturl() is a fixpoint; we can always parse it
92        # again to get the same result:
93        result3 = urllib.parse.urlparse(result.geturl())
94        self.assertEqual(result3.geturl(), result.geturl())
95        self.assertEqual(result3,          result)
96        self.assertEqual(result3.scheme,   result.scheme)
97        self.assertEqual(result3.netloc,   result.netloc)
98        self.assertEqual(result3.path,     result.path)
99        self.assertEqual(result3.params,   result.params)
100        self.assertEqual(result3.query,    result.query)
101        self.assertEqual(result3.fragment, result.fragment)
102        self.assertEqual(result3.username, result.username)
103        self.assertEqual(result3.password, result.password)
104        self.assertEqual(result3.hostname, result.hostname)
105        self.assertEqual(result3.port,     result.port)
106
107        # check the roundtrip using urlsplit() as well
108        result = urllib.parse.urlsplit(url)
109        self.assertEqual(result, split)
110        t = (result.scheme, result.netloc, result.path,
111             result.query, result.fragment)
112        self.assertEqual(t, split)
113        result2 = urllib.parse.urlunsplit(result)
114        self.assertEqual(result2, url)
115        self.assertEqual(result2, result.geturl())
116
117        # check the fixpoint property of re-parsing the result of geturl()
118        result3 = urllib.parse.urlsplit(result.geturl())
119        self.assertEqual(result3.geturl(), result.geturl())
120        self.assertEqual(result3,          result)
121        self.assertEqual(result3.scheme,   result.scheme)
122        self.assertEqual(result3.netloc,   result.netloc)
123        self.assertEqual(result3.path,     result.path)
124        self.assertEqual(result3.query,    result.query)
125        self.assertEqual(result3.fragment, result.fragment)
126        self.assertEqual(result3.username, result.username)
127        self.assertEqual(result3.password, result.password)
128        self.assertEqual(result3.hostname, result.hostname)
129        self.assertEqual(result3.port,     result.port)
130
131    def test_qsl(self):
132        for orig, expect in parse_qsl_test_cases:
133            result = urllib.parse.parse_qsl(orig, keep_blank_values=True)
134            self.assertEqual(result, expect, "Error parsing %r" % orig)
135            expect_without_blanks = [v for v in expect if len(v[1])]
136            result = urllib.parse.parse_qsl(orig, keep_blank_values=False)
137            self.assertEqual(result, expect_without_blanks,
138                            "Error parsing %r" % orig)
139
140    def test_qs(self):
141        for orig, expect in parse_qs_test_cases:
142            result = urllib.parse.parse_qs(orig, keep_blank_values=True)
143            self.assertEqual(result, expect, "Error parsing %r" % orig)
144            expect_without_blanks = {v: expect[v]
145                                     for v in expect if len(expect[v][0])}
146            result = urllib.parse.parse_qs(orig, keep_blank_values=False)
147            self.assertEqual(result, expect_without_blanks,
148                            "Error parsing %r" % orig)
149
150    def test_roundtrips(self):
151        str_cases = [
152            ('file:///tmp/junk.txt',
153             ('file', '', '/tmp/junk.txt', '', '', ''),
154             ('file', '', '/tmp/junk.txt', '', '')),
155            ('imap://mail.python.org/mbox1',
156             ('imap', 'mail.python.org', '/mbox1', '', '', ''),
157             ('imap', 'mail.python.org', '/mbox1', '', '')),
158            ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
159             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
160              '', '', ''),
161             ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
162              '', '')),
163            ('nfs://server/path/to/file.txt',
164             ('nfs', 'server', '/path/to/file.txt', '', '', ''),
165             ('nfs', 'server', '/path/to/file.txt', '', '')),
166            ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
167             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
168              '', '', ''),
169             ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
170              '', '')),
171            ('git+ssh://git@github.com/user/project.git',
172            ('git+ssh', 'git@github.com','/user/project.git',
173             '','',''),
174            ('git+ssh', 'git@github.com','/user/project.git',
175             '', '')),
176            ]
177        def _encode(t):
178            return (t[0].encode('ascii'),
179                    tuple(x.encode('ascii') for x in t[1]),
180                    tuple(x.encode('ascii') for x in t[2]))
181        bytes_cases = [_encode(x) for x in str_cases]
182        for url, parsed, split in str_cases + bytes_cases:
183            self.checkRoundtrips(url, parsed, split)
184
185    def test_http_roundtrips(self):
186        # urllib.parse.urlsplit treats 'http:' as an optimized special case,
187        # so we test both 'http:' and 'https:' in all the following.
188        # Three cheers for white box knowledge!
189        str_cases = [
190            ('://www.python.org',
191             ('www.python.org', '', '', '', ''),
192             ('www.python.org', '', '', '')),
193            ('://www.python.org#abc',
194             ('www.python.org', '', '', '', 'abc'),
195             ('www.python.org', '', '', 'abc')),
196            ('://www.python.org?q=abc',
197             ('www.python.org', '', '', 'q=abc', ''),
198             ('www.python.org', '', 'q=abc', '')),
199            ('://www.python.org/#abc',
200             ('www.python.org', '/', '', '', 'abc'),
201             ('www.python.org', '/', '', 'abc')),
202            ('://a/b/c/d;p?q#f',
203             ('a', '/b/c/d', 'p', 'q', 'f'),
204             ('a', '/b/c/d;p', 'q', 'f')),
205            ]
206        def _encode(t):
207            return (t[0].encode('ascii'),
208                    tuple(x.encode('ascii') for x in t[1]),
209                    tuple(x.encode('ascii') for x in t[2]))
210        bytes_cases = [_encode(x) for x in str_cases]
211        str_schemes = ('http', 'https')
212        bytes_schemes = (b'http', b'https')
213        str_tests = str_schemes, str_cases
214        bytes_tests = bytes_schemes, bytes_cases
215        for schemes, test_cases in (str_tests, bytes_tests):
216            for scheme in schemes:
217                for url, parsed, split in test_cases:
218                    url = scheme + url
219                    parsed = (scheme,) + parsed
220                    split = (scheme,) + split
221                    self.checkRoundtrips(url, parsed, split)
222
223    def checkJoin(self, base, relurl, expected):
224        str_components = (base, relurl, expected)
225        self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
226        bytes_components = baseb, relurlb, expectedb = [
227                            x.encode('ascii') for x in str_components]
228        self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
229
230    def test_unparse_parse(self):
231        str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]
232        bytes_cases = [x.encode('ascii') for x in str_cases]
233        for u in str_cases + bytes_cases:
234            self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u)
235            self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u)
236
237    def test_RFC1808(self):
238        # "normal" cases from RFC 1808:
239        self.checkJoin(RFC1808_BASE, 'g:h', 'g:h')
240        self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g')
241        self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g')
242        self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/')
243        self.checkJoin(RFC1808_BASE, '/g', 'http://a/g')
244        self.checkJoin(RFC1808_BASE, '//g', 'http://g')
245        self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y')
246        self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
247        self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s')
248        self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s')
249        self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
250        self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
251        self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x')
252        self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
253        self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/')
254        self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/')
255        self.checkJoin(RFC1808_BASE, '..', 'http://a/b/')
256        self.checkJoin(RFC1808_BASE, '../', 'http://a/b/')
257        self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g')
258        self.checkJoin(RFC1808_BASE, '../..', 'http://a/')
259        self.checkJoin(RFC1808_BASE, '../../', 'http://a/')
260        self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
261
262        # "abnormal" cases from RFC 1808:
263        self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
264        self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
265        self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
266        self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
267        self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g')
268        self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g')
269        self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/')
270        self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h')
271        self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h')
272
273        # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808),
274        # so we'll not actually run these tests (which expect 1808 behavior).
275        #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
276        #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
277
278        # XXX: The following tests are no longer compatible with RFC3986
279        # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g')
280        # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g')
281        # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g')
282        # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g')
283
284
285    def test_RFC2368(self):
286        # Issue 11467: path that starts with a number is not parsed correctly
287        self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
288                ('mailto', '', '1337@example.org', '', '', ''))
289
290    def test_RFC2396(self):
291        # cases from RFC 2396
292
293
294        self.checkJoin(RFC2396_BASE, 'g:h', 'g:h')
295        self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g')
296        self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g')
297        self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/')
298        self.checkJoin(RFC2396_BASE, '/g', 'http://a/g')
299        self.checkJoin(RFC2396_BASE, '//g', 'http://g')
300        self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y')
301        self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s')
302        self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s')
303        self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s')
304        self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x')
305        self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s')
306        self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/')
307        self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/')
308        self.checkJoin(RFC2396_BASE, '..', 'http://a/b/')
309        self.checkJoin(RFC2396_BASE, '../', 'http://a/b/')
310        self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g')
311        self.checkJoin(RFC2396_BASE, '../..', 'http://a/')
312        self.checkJoin(RFC2396_BASE, '../../', 'http://a/')
313        self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g')
314        self.checkJoin(RFC2396_BASE, '', RFC2396_BASE)
315        self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.')
316        self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g')
317        self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..')
318        self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g')
319        self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g')
320        self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/')
321        self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h')
322        self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h')
323        self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y')
324        self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y')
325        self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x')
326        self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x')
327        self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x')
328        self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x')
329
330        # XXX: The following tests are no longer compatible with RFC3986
331        # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g')
332        # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g')
333        # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g')
334        # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g')
335
336
337    def test_RFC3986(self):
338        # Test cases from RFC3986
339        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
340        self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x')
341        self.checkJoin(RFC3986_BASE, 'g:h','g:h')
342        self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g')
343        self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g')
344        self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/')
345        self.checkJoin(RFC3986_BASE, '/g','http://a/g')
346        self.checkJoin(RFC3986_BASE, '//g','http://g')
347        self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y')
348        self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y')
349        self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s')
350        self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s')
351        self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s')
352        self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x')
353        self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x')
354        self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s')
355        self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q')
356        self.checkJoin(RFC3986_BASE, '.','http://a/b/c/')
357        self.checkJoin(RFC3986_BASE, './','http://a/b/c/')
358        self.checkJoin(RFC3986_BASE, '..','http://a/b/')
359        self.checkJoin(RFC3986_BASE, '../','http://a/b/')
360        self.checkJoin(RFC3986_BASE, '../g','http://a/b/g')
361        self.checkJoin(RFC3986_BASE, '../..','http://a/')
362        self.checkJoin(RFC3986_BASE, '../../','http://a/')
363        self.checkJoin(RFC3986_BASE, '../../g','http://a/g')
364        self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g')
365
366        #Abnormal Examples
367
368        # The 'abnormal scenarios' are incompatible with RFC2986 parsing
369        # Tests are here for reference.
370
371        self.checkJoin(RFC3986_BASE, '../../../g','http://a/g')
372        self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g')
373        self.checkJoin(RFC3986_BASE, '/./g','http://a/g')
374        self.checkJoin(RFC3986_BASE, '/../g','http://a/g')
375        self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.')
376        self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g')
377        self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..')
378        self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g')
379        self.checkJoin(RFC3986_BASE, './../g','http://a/b/g')
380        self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/')
381        self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h')
382        self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h')
383        self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y')
384        self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y')
385        self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
386        self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x')
387        self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x')
388        self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x')
389        #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser
390        self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser
391
392        # Test for issue9721
393        self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x')
394
395    def test_urljoins(self):
396        self.checkJoin(SIMPLE_BASE, 'g:h','g:h')
397        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
398        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
399        self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g')
400        self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g')
401        self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/')
402        self.checkJoin(SIMPLE_BASE, '/g','http://a/g')
403        self.checkJoin(SIMPLE_BASE, '//g','http://g')
404        self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y')
405        self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y')
406        self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x')
407        self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/')
408        self.checkJoin(SIMPLE_BASE, './','http://a/b/c/')
409        self.checkJoin(SIMPLE_BASE, '..','http://a/b/')
410        self.checkJoin(SIMPLE_BASE, '../','http://a/b/')
411        self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g')
412        self.checkJoin(SIMPLE_BASE, '../..','http://a/')
413        self.checkJoin(SIMPLE_BASE, '../../g','http://a/g')
414        self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g')
415        self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/')
416        self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h')
417        self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h')
418        self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g')
419        self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d')
420        self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y')
421        self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y')
422        self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x')
423        self.checkJoin('http:///', '..','http:///')
424        self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x')
425        self.checkJoin('', 'http://a/./g', 'http://a/./g')
426        self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2')
427        self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2')
428        self.checkJoin('ws://a/b','g','ws://a/g')
429        self.checkJoin('wss://a/b','g','wss://a/g')
430
431        # XXX: The following tests are no longer compatible with RFC3986
432        # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g')
433        # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g')
434
435        # test for issue22118 duplicate slashes
436        self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo')
437
438        # Non-RFC-defined tests, covering variations of base and trailing
439        # slashes
440        self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/')
441        self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/')
442        self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/')
443        self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/')
444        self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g')
445        self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/')
446
447        # issue 23703: don't duplicate filename
448        self.checkJoin('a', 'b', 'b')
449
450    def test_RFC2732(self):
451        str_cases = [
452            ('http://Test.python.org:5432/foo/', 'test.python.org', 5432),
453            ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432),
454            ('http://[::1]:5432/foo/', '::1', 5432),
455            ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432),
456            ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432),
457            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/',
458             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432),
459            ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432),
460            ('http://[::ffff:12.34.56.78]:5432/foo/',
461             '::ffff:12.34.56.78', 5432),
462            ('http://Test.python.org/foo/', 'test.python.org', None),
463            ('http://12.34.56.78/foo/', '12.34.56.78', None),
464            ('http://[::1]/foo/', '::1', None),
465            ('http://[dead:beef::1]/foo/', 'dead:beef::1', None),
466            ('http://[dead:beef::]/foo/', 'dead:beef::', None),
467            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/',
468             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
469            ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None),
470            ('http://[::ffff:12.34.56.78]/foo/',
471             '::ffff:12.34.56.78', None),
472            ('http://Test.python.org:/foo/', 'test.python.org', None),
473            ('http://12.34.56.78:/foo/', '12.34.56.78', None),
474            ('http://[::1]:/foo/', '::1', None),
475            ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None),
476            ('http://[dead:beef::]:/foo/', 'dead:beef::', None),
477            ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/',
478             'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None),
479            ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None),
480            ('http://[::ffff:12.34.56.78]:/foo/',
481             '::ffff:12.34.56.78', None),
482            ]
483        def _encode(t):
484            return t[0].encode('ascii'), t[1].encode('ascii'), t[2]
485        bytes_cases = [_encode(x) for x in str_cases]
486        for url, hostname, port in str_cases + bytes_cases:
487            urlparsed = urllib.parse.urlparse(url)
488            self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port))
489
490        str_cases = [
491                'http://::12.34.56.78]/',
492                'http://[::1/foo/',
493                'ftp://[::1/foo/bad]/bad',
494                'http://[::1/foo/bad]/bad',
495                'http://[::ffff:12.34.56.78']
496        bytes_cases = [x.encode('ascii') for x in str_cases]
497        for invalid_url in str_cases + bytes_cases:
498            self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url)
499
500    def test_urldefrag(self):
501        str_cases = [
502            ('http://python.org#frag', 'http://python.org', 'frag'),
503            ('http://python.org', 'http://python.org', ''),
504            ('http://python.org/#frag', 'http://python.org/', 'frag'),
505            ('http://python.org/', 'http://python.org/', ''),
506            ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
507            ('http://python.org/?q', 'http://python.org/?q', ''),
508            ('http://python.org/p#frag', 'http://python.org/p', 'frag'),
509            ('http://python.org/p?q', 'http://python.org/p?q', ''),
510            (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
511            (RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
512        ]
513        def _encode(t):
514            return type(t)(x.encode('ascii') for x in t)
515        bytes_cases = [_encode(x) for x in str_cases]
516        for url, defrag, frag in str_cases + bytes_cases:
517            result = urllib.parse.urldefrag(url)
518            self.assertEqual(result.geturl(), url)
519            self.assertEqual(result, (defrag, frag))
520            self.assertEqual(result.url, defrag)
521            self.assertEqual(result.fragment, frag)
522
523    def test_urlsplit_attributes(self):
524        url = "HTTP://WWW.PYTHON.ORG/doc/#frag"
525        p = urllib.parse.urlsplit(url)
526        self.assertEqual(p.scheme, "http")
527        self.assertEqual(p.netloc, "WWW.PYTHON.ORG")
528        self.assertEqual(p.path, "/doc/")
529        self.assertEqual(p.query, "")
530        self.assertEqual(p.fragment, "frag")
531        self.assertEqual(p.username, None)
532        self.assertEqual(p.password, None)
533        self.assertEqual(p.hostname, "www.python.org")
534        self.assertEqual(p.port, None)
535        # geturl() won't return exactly the original URL in this case
536        # since the scheme is always case-normalized
537        # We handle this by ignoring the first 4 characters of the URL
538        self.assertEqual(p.geturl()[4:], url[4:])
539
540        url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag"
541        p = urllib.parse.urlsplit(url)
542        self.assertEqual(p.scheme, "http")
543        self.assertEqual(p.netloc, "User:Pass@www.python.org:080")
544        self.assertEqual(p.path, "/doc/")
545        self.assertEqual(p.query, "query=yes")
546        self.assertEqual(p.fragment, "frag")
547        self.assertEqual(p.username, "User")
548        self.assertEqual(p.password, "Pass")
549        self.assertEqual(p.hostname, "www.python.org")
550        self.assertEqual(p.port, 80)
551        self.assertEqual(p.geturl(), url)
552
553        # Addressing issue1698, which suggests Username can contain
554        # "@" characters.  Though not RFC compliant, many ftp sites allow
555        # and request email addresses as usernames.
556
557        url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
558        p = urllib.parse.urlsplit(url)
559        self.assertEqual(p.scheme, "http")
560        self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080")
561        self.assertEqual(p.path, "/doc/")
562        self.assertEqual(p.query, "query=yes")
563        self.assertEqual(p.fragment, "frag")
564        self.assertEqual(p.username, "User@example.com")
565        self.assertEqual(p.password, "Pass")
566        self.assertEqual(p.hostname, "www.python.org")
567        self.assertEqual(p.port, 80)
568        self.assertEqual(p.geturl(), url)
569
570        # And check them all again, only with bytes this time
571        url = b"HTTP://WWW.PYTHON.ORG/doc/#frag"
572        p = urllib.parse.urlsplit(url)
573        self.assertEqual(p.scheme, b"http")
574        self.assertEqual(p.netloc, b"WWW.PYTHON.ORG")
575        self.assertEqual(p.path, b"/doc/")
576        self.assertEqual(p.query, b"")
577        self.assertEqual(p.fragment, b"frag")
578        self.assertEqual(p.username, None)
579        self.assertEqual(p.password, None)
580        self.assertEqual(p.hostname, b"www.python.org")
581        self.assertEqual(p.port, None)
582        self.assertEqual(p.geturl()[4:], url[4:])
583
584        url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag"
585        p = urllib.parse.urlsplit(url)
586        self.assertEqual(p.scheme, b"http")
587        self.assertEqual(p.netloc, b"User:Pass@www.python.org:080")
588        self.assertEqual(p.path, b"/doc/")
589        self.assertEqual(p.query, b"query=yes")
590        self.assertEqual(p.fragment, b"frag")
591        self.assertEqual(p.username, b"User")
592        self.assertEqual(p.password, b"Pass")
593        self.assertEqual(p.hostname, b"www.python.org")
594        self.assertEqual(p.port, 80)
595        self.assertEqual(p.geturl(), url)
596
597        url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag"
598        p = urllib.parse.urlsplit(url)
599        self.assertEqual(p.scheme, b"http")
600        self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080")
601        self.assertEqual(p.path, b"/doc/")
602        self.assertEqual(p.query, b"query=yes")
603        self.assertEqual(p.fragment, b"frag")
604        self.assertEqual(p.username, b"User@example.com")
605        self.assertEqual(p.password, b"Pass")
606        self.assertEqual(p.hostname, b"www.python.org")
607        self.assertEqual(p.port, 80)
608        self.assertEqual(p.geturl(), url)
609
610        # Verify an illegal port raises ValueError
611        url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag"
612        p = urllib.parse.urlsplit(url)
613        with self.assertRaisesRegex(ValueError, "out of range"):
614            p.port
615
616    def test_attributes_bad_port(self):
617        """Check handling of invalid ports."""
618        for bytes in (False, True):
619            for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
620                for port in ("foo", "1.5", "-1", "0x10"):
621                    with self.subTest(bytes=bytes, parse=parse, port=port):
622                        netloc = "www.example.net:" + port
623                        url = "http://" + netloc
624                        if bytes:
625                            netloc = netloc.encode("ascii")
626                            url = url.encode("ascii")
627                        p = parse(url)
628                        self.assertEqual(p.netloc, netloc)
629                        with self.assertRaises(ValueError):
630                            p.port
631
632    def test_attributes_without_netloc(self):
633        # This example is straight from RFC 3261.  It looks like it
634        # should allow the username, hostname, and port to be filled
635        # in, but doesn't.  Since it's a URI and doesn't use the
636        # scheme://netloc syntax, the netloc and related attributes
637        # should be left empty.
638        uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
639        p = urllib.parse.urlsplit(uri)
640        self.assertEqual(p.netloc, "")
641        self.assertEqual(p.username, None)
642        self.assertEqual(p.password, None)
643        self.assertEqual(p.hostname, None)
644        self.assertEqual(p.port, None)
645        self.assertEqual(p.geturl(), uri)
646
647        p = urllib.parse.urlparse(uri)
648        self.assertEqual(p.netloc, "")
649        self.assertEqual(p.username, None)
650        self.assertEqual(p.password, None)
651        self.assertEqual(p.hostname, None)
652        self.assertEqual(p.port, None)
653        self.assertEqual(p.geturl(), uri)
654
655        # You guessed it, repeating the test with bytes input
656        uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
657        p = urllib.parse.urlsplit(uri)
658        self.assertEqual(p.netloc, b"")
659        self.assertEqual(p.username, None)
660        self.assertEqual(p.password, None)
661        self.assertEqual(p.hostname, None)
662        self.assertEqual(p.port, None)
663        self.assertEqual(p.geturl(), uri)
664
665        p = urllib.parse.urlparse(uri)
666        self.assertEqual(p.netloc, b"")
667        self.assertEqual(p.username, None)
668        self.assertEqual(p.password, None)
669        self.assertEqual(p.hostname, None)
670        self.assertEqual(p.port, None)
671        self.assertEqual(p.geturl(), uri)
672
673    def test_noslash(self):
674        # Issue 1637: http://foo.com?query is legal
675        self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
676                         ('http', 'example.com', '', '', 'blahblah=/foo', ''))
677        self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
678                         (b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
679
680    def test_withoutscheme(self):
681        # Test urlparse without scheme
682        # Issue 754016: urlparse goes wrong with IP:port without scheme
683        # RFC 1808 specifies that netloc should start with //, urlparse expects
684        # the same, otherwise it classifies the portion of url as path.
685        self.assertEqual(urllib.parse.urlparse("path"),
686                ('','','path','','',''))
687        self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
688                ('','www.python.org:80','','','',''))
689        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
690                ('http','www.python.org:80','','','',''))
691        # Repeat for bytes input
692        self.assertEqual(urllib.parse.urlparse(b"path"),
693                (b'',b'',b'path',b'',b'',b''))
694        self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
695                (b'',b'www.python.org:80',b'',b'',b'',b''))
696        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
697                (b'http',b'www.python.org:80',b'',b'',b'',b''))
698
699    def test_portseparator(self):
700        # Issue 754016 makes changes for port separator ':' from scheme separator
701        self.assertEqual(urllib.parse.urlparse("path:80"),
702                ('','','path:80','','',''))
703        self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
704        self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
705        self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
706                ('http','www.python.org:80','','','',''))
707        # As usual, need to check bytes input as well
708        self.assertEqual(urllib.parse.urlparse(b"path:80"),
709                (b'',b'',b'path:80',b'',b'',b''))
710        self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
711        self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
712        self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
713                (b'http',b'www.python.org:80',b'',b'',b'',b''))
714
715    def test_usingsys(self):
716        # Issue 3314: sys module is used in the error
717        self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
718
719    def test_anyscheme(self):
720        # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
721        self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
722                         ('s3', 'foo.com', '/stuff', '', '', ''))
723        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
724                         ('x-newscheme', 'foo.com', '/stuff', '', '', ''))
725        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
726                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
727        self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
728                         ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
729
730        # And for bytes...
731        self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
732                         (b's3', b'foo.com', b'/stuff', b'', b'', b''))
733        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
734                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
735        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
736                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
737        self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
738                         (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
739
740    def test_default_scheme(self):
741        # Exercise the scheme parameter of urlparse() and urlsplit()
742        for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
743            with self.subTest(function=func):
744                result = func("http://example.net/", "ftp")
745                self.assertEqual(result.scheme, "http")
746                result = func(b"http://example.net/", b"ftp")
747                self.assertEqual(result.scheme, b"http")
748                self.assertEqual(func("path", "ftp").scheme, "ftp")
749                self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
750                self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
751                self.assertEqual(func("path").scheme, "")
752                self.assertEqual(func(b"path").scheme, b"")
753                self.assertEqual(func(b"path", "").scheme, b"")
754
755    def test_parse_fragments(self):
756        # Exercise the allow_fragments parameter of urlparse() and urlsplit()
757        tests = (
758            ("http:#frag", "path"),
759            ("//example.net#frag", "path"),
760            ("index.html#frag", "path"),
761            (";a=b#frag", "params"),
762            ("?a=b#frag", "query"),
763            ("#frag", "path"),
764        )
765        for url, attr in tests:
766            for func in (urllib.parse.urlparse, urllib.parse.urlsplit):
767                if attr == "params" and func is urllib.parse.urlsplit:
768                    attr = "path"
769                with self.subTest(url=url, function=func):
770                    result = func(url, allow_fragments=False)
771                    self.assertEqual(result.fragment, "")
772                    self.assertTrue(getattr(result, attr).endswith("#frag"))
773                    self.assertEqual(func(url, "", False).fragment, "")
774
775                    result = func(url, allow_fragments=True)
776                    self.assertEqual(result.fragment, "frag")
777                    self.assertFalse(getattr(result, attr).endswith("frag"))
778                    self.assertEqual(func(url, "", True).fragment, "frag")
779                    self.assertEqual(func(url).fragment, "frag")
780
781    def test_mixed_types_rejected(self):
782        # Several functions that process either strings or ASCII encoded bytes
783        # accept multiple arguments. Check they reject mixed type input
784        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
785            urllib.parse.urlparse("www.python.org", b"http")
786        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
787            urllib.parse.urlparse(b"www.python.org", "http")
788        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
789            urllib.parse.urlsplit("www.python.org", b"http")
790        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
791            urllib.parse.urlsplit(b"www.python.org", "http")
792        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
793            urllib.parse.urlunparse(( b"http", "www.python.org","","","",""))
794        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
795            urllib.parse.urlunparse(("http", b"www.python.org","","","",""))
796        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
797            urllib.parse.urlunsplit((b"http", "www.python.org","","",""))
798        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
799            urllib.parse.urlunsplit(("http", b"www.python.org","","",""))
800        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
801            urllib.parse.urljoin("http://python.org", b"http://python.org")
802        with self.assertRaisesRegex(TypeError, "Cannot mix str"):
803            urllib.parse.urljoin(b"http://python.org", "http://python.org")
804
805    def _check_result_type(self, str_type):
806        num_args = len(str_type._fields)
807        bytes_type = str_type._encoded_counterpart
808        self.assertIs(bytes_type._decoded_counterpart, str_type)
809        str_args = ('',) * num_args
810        bytes_args = (b'',) * num_args
811        str_result = str_type(*str_args)
812        bytes_result = bytes_type(*bytes_args)
813        encoding = 'ascii'
814        errors = 'strict'
815        self.assertEqual(str_result, str_args)
816        self.assertEqual(bytes_result.decode(), str_args)
817        self.assertEqual(bytes_result.decode(), str_result)
818        self.assertEqual(bytes_result.decode(encoding), str_args)
819        self.assertEqual(bytes_result.decode(encoding), str_result)
820        self.assertEqual(bytes_result.decode(encoding, errors), str_args)
821        self.assertEqual(bytes_result.decode(encoding, errors), str_result)
822        self.assertEqual(bytes_result, bytes_args)
823        self.assertEqual(str_result.encode(), bytes_args)
824        self.assertEqual(str_result.encode(), bytes_result)
825        self.assertEqual(str_result.encode(encoding), bytes_args)
826        self.assertEqual(str_result.encode(encoding), bytes_result)
827        self.assertEqual(str_result.encode(encoding, errors), bytes_args)
828        self.assertEqual(str_result.encode(encoding, errors), bytes_result)
829
830    def test_result_pairs(self):
831        # Check encoding and decoding between result pairs
832        result_types = [
833          urllib.parse.DefragResult,
834          urllib.parse.SplitResult,
835          urllib.parse.ParseResult,
836        ]
837        for result_type in result_types:
838            self._check_result_type(result_type)
839
840    def test_parse_qs_encoding(self):
841        result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
842        self.assertEqual(result, {'key': ['\u0141\xE9']})
843        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8")
844        self.assertEqual(result, {'key': ['\u0141\xE9']})
845        result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii")
846        self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']})
847        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii")
848        self.assertEqual(result, {'key': ['\u0141\ufffd-']})
849        result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii",
850                                                          errors="ignore")
851        self.assertEqual(result, {'key': ['\u0141-']})
852
853    def test_parse_qsl_encoding(self):
854        result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1")
855        self.assertEqual(result, [('key', '\u0141\xE9')])
856        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8")
857        self.assertEqual(result, [('key', '\u0141\xE9')])
858        result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii")
859        self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')])
860        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii")
861        self.assertEqual(result, [('key', '\u0141\ufffd-')])
862        result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii",
863                                                          errors="ignore")
864        self.assertEqual(result, [('key', '\u0141-')])
865
866    def test_urlencode_sequences(self):
867        # Other tests incidentally urlencode things; test non-covered cases:
868        # Sequence and object values.
869        result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True)
870        # we cannot rely on ordering here
871        assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'}
872
873        class Trivial:
874            def __str__(self):
875                return 'trivial'
876
877        result = urllib.parse.urlencode({'a': Trivial()}, True)
878        self.assertEqual(result, 'a=trivial')
879
880    def test_urlencode_quote_via(self):
881        result = urllib.parse.urlencode({'a': 'some value'})
882        self.assertEqual(result, "a=some+value")
883        result = urllib.parse.urlencode({'a': 'some value/another'},
884                                        quote_via=urllib.parse.quote)
885        self.assertEqual(result, "a=some%20value%2Fanother")
886        result = urllib.parse.urlencode({'a': 'some value/another'},
887                                        safe='/', quote_via=urllib.parse.quote)
888        self.assertEqual(result, "a=some%20value/another")
889
890    def test_quote_from_bytes(self):
891        self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo')
892        result = urllib.parse.quote_from_bytes(b'archaeological arcana')
893        self.assertEqual(result, 'archaeological%20arcana')
894        result = urllib.parse.quote_from_bytes(b'')
895        self.assertEqual(result, '')
896
897    def test_unquote_to_bytes(self):
898        result = urllib.parse.unquote_to_bytes('abc%20def')
899        self.assertEqual(result, b'abc def')
900        result = urllib.parse.unquote_to_bytes('')
901        self.assertEqual(result, b'')
902
903    def test_quote_errors(self):
904        self.assertRaises(TypeError, urllib.parse.quote, b'foo',
905                          encoding='utf-8')
906        self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict')
907
908    def test_issue14072(self):
909        p1 = urllib.parse.urlsplit('tel:+31-641044153')
910        self.assertEqual(p1.scheme, 'tel')
911        self.assertEqual(p1.path, '+31-641044153')
912        p2 = urllib.parse.urlsplit('tel:+31641044153')
913        self.assertEqual(p2.scheme, 'tel')
914        self.assertEqual(p2.path, '+31641044153')
915        # assert the behavior for urlparse
916        p1 = urllib.parse.urlparse('tel:+31-641044153')
917        self.assertEqual(p1.scheme, 'tel')
918        self.assertEqual(p1.path, '+31-641044153')
919        p2 = urllib.parse.urlparse('tel:+31641044153')
920        self.assertEqual(p2.scheme, 'tel')
921        self.assertEqual(p2.path, '+31641044153')
922
923    def test_telurl_params(self):
924        p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516')
925        self.assertEqual(p1.scheme, 'tel')
926        self.assertEqual(p1.path, '123-4')
927        self.assertEqual(p1.params, 'phone-context=+1-650-516')
928
929        p1 = urllib.parse.urlparse('tel:+1-201-555-0123')
930        self.assertEqual(p1.scheme, 'tel')
931        self.assertEqual(p1.path, '+1-201-555-0123')
932        self.assertEqual(p1.params, '')
933
934        p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
935        self.assertEqual(p1.scheme, 'tel')
936        self.assertEqual(p1.path, '7042')
937        self.assertEqual(p1.params, 'phone-context=example.com')
938
939        p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555')
940        self.assertEqual(p1.scheme, 'tel')
941        self.assertEqual(p1.path, '863-1234')
942        self.assertEqual(p1.params, 'phone-context=+1-914-555')
943
944    def test_Quoter_repr(self):
945        quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE)
946        self.assertIn('Quoter', repr(quoter))
947
948    def test_all(self):
949        expected = []
950        undocumented = {
951            'splitattr', 'splithost', 'splitnport', 'splitpasswd',
952            'splitport', 'splitquery', 'splittag', 'splittype', 'splituser',
953            'splitvalue',
954            'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap',
955        }
956        for name in dir(urllib.parse):
957            if name.startswith('_') or name in undocumented:
958                continue
959            object = getattr(urllib.parse, name)
960            if getattr(object, '__module__', None) == 'urllib.parse':
961                expected.append(name)
962        self.assertCountEqual(urllib.parse.__all__, expected)
963
964
965class Utility_Tests(unittest.TestCase):
966    """Testcase to test the various utility functions in the urllib."""
967    # In Python 2 this test class was in test_urllib.
968
969    def test_splittype(self):
970        splittype = urllib.parse.splittype
971        self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
972        self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
973        self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
974        self.assertEqual(splittype('type:'), ('type', ''))
975        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
976
977    def test_splithost(self):
978        splithost = urllib.parse.splithost
979        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
980                         ('www.example.org:80', '/foo/bar/baz.html'))
981        self.assertEqual(splithost('//www.example.org:80'),
982                         ('www.example.org:80', ''))
983        self.assertEqual(splithost('/foo/bar/baz.html'),
984                         (None, '/foo/bar/baz.html'))
985
986    def test_splituser(self):
987        splituser = urllib.parse.splituser
988        self.assertEqual(splituser('User:Pass@www.python.org:080'),
989                         ('User:Pass', 'www.python.org:080'))
990        self.assertEqual(splituser('@www.python.org:080'),
991                         ('', 'www.python.org:080'))
992        self.assertEqual(splituser('www.python.org:080'),
993                         (None, 'www.python.org:080'))
994        self.assertEqual(splituser('User:Pass@'),
995                         ('User:Pass', ''))
996        self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
997                         ('User@example.com:Pass', 'www.python.org:080'))
998
999    def test_splitpasswd(self):
1000        # Some of the password examples are not sensible, but it is added to
1001        # confirming to RFC2617 and addressing issue4675.
1002        splitpasswd = urllib.parse.splitpasswd
1003        self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
1004        self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
1005        self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
1006        self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
1007        self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
1008        self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
1009        self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
1010        self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
1011        self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
1012        self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
1013        self.assertEqual(splitpasswd('user:'), ('user', ''))
1014        self.assertEqual(splitpasswd('user'), ('user', None))
1015        self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
1016
1017    def test_splitport(self):
1018        splitport = urllib.parse.splitport
1019        self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
1020        self.assertEqual(splitport('parrot'), ('parrot', None))
1021        self.assertEqual(splitport('parrot:'), ('parrot', None))
1022        self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
1023        self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
1024        self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
1025        self.assertEqual(splitport('[::1]'), ('[::1]', None))
1026        self.assertEqual(splitport(':88'), ('', '88'))
1027
1028    def test_splitnport(self):
1029        splitnport = urllib.parse.splitnport
1030        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
1031        self.assertEqual(splitnport('parrot'), ('parrot', -1))
1032        self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
1033        self.assertEqual(splitnport('parrot:'), ('parrot', -1))
1034        self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
1035        self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
1036        self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
1037        self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
1038        self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
1039
1040    def test_splitquery(self):
1041        # Normal cases are exercised by other tests; ensure that we also
1042        # catch cases with no port specified (testcase ensuring coverage)
1043        splitquery = urllib.parse.splitquery
1044        self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
1045                         ('http://python.org/fake', 'foo=bar'))
1046        self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
1047                         ('http://python.org/fake?foo=bar', ''))
1048        self.assertEqual(splitquery('http://python.org/fake'),
1049                         ('http://python.org/fake', None))
1050        self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
1051
1052    def test_splittag(self):
1053        splittag = urllib.parse.splittag
1054        self.assertEqual(splittag('http://example.com?foo=bar#baz'),
1055                         ('http://example.com?foo=bar', 'baz'))
1056        self.assertEqual(splittag('http://example.com?foo=bar#'),
1057                         ('http://example.com?foo=bar', ''))
1058        self.assertEqual(splittag('#baz'), ('', 'baz'))
1059        self.assertEqual(splittag('http://example.com?foo=bar'),
1060                         ('http://example.com?foo=bar', None))
1061        self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
1062                         ('http://example.com?foo=bar#baz', 'boo'))
1063
1064    def test_splitattr(self):
1065        splitattr = urllib.parse.splitattr
1066        self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
1067                         ('/path', ['attr1=value1', 'attr2=value2']))
1068        self.assertEqual(splitattr('/path;'), ('/path', ['']))
1069        self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
1070                         ('', ['attr1=value1', 'attr2=value2']))
1071        self.assertEqual(splitattr('/path'), ('/path', []))
1072
1073    def test_splitvalue(self):
1074        # Normal cases are exercised by other tests; test pathological cases
1075        # with no key/value pairs. (testcase ensuring coverage)
1076        splitvalue = urllib.parse.splitvalue
1077        self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
1078        self.assertEqual(splitvalue('foo='), ('foo', ''))
1079        self.assertEqual(splitvalue('=bar'), ('', 'bar'))
1080        self.assertEqual(splitvalue('foobar'), ('foobar', None))
1081        self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
1082
1083    def test_to_bytes(self):
1084        result = urllib.parse.to_bytes('http://www.python.org')
1085        self.assertEqual(result, 'http://www.python.org')
1086        self.assertRaises(UnicodeError, urllib.parse.to_bytes,
1087                          'http://www.python.org/medi\u00e6val')
1088
1089    def test_unwrap(self):
1090        url = urllib.parse.unwrap('<URL:type://host/path>')
1091        self.assertEqual(url, 'type://host/path')
1092
1093
1094if __name__ == "__main__":
1095    unittest.main()
1096