1import unittest 2import urllib.parse 3 4RFC1808_BASE = "http://a/b/c/d;p?q#f" 5RFC2396_BASE = "http://a/b/c/d;p?q" 6RFC3986_BASE = 'http://a/b/c/d;p?q' 7SIMPLE_BASE = 'http://a/b/c/d' 8 9# A list of test cases. Each test case is a two-tuple that contains 10# a string with the query and a dictionary with the expected result. 11 12parse_qsl_test_cases = [ 13 ("", []), 14 ("&", []), 15 ("&&", []), 16 ("=", [('', '')]), 17 ("=a", [('', 'a')]), 18 ("a", [('a', '')]), 19 ("a=", [('a', '')]), 20 ("&a=b", [('a', 'b')]), 21 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 22 ("a=1&a=2", [('a', '1'), ('a', '2')]), 23 (b"", []), 24 (b"&", []), 25 (b"&&", []), 26 (b"=", [(b'', b'')]), 27 (b"=a", [(b'', b'a')]), 28 (b"a", [(b'a', b'')]), 29 (b"a=", [(b'a', b'')]), 30 (b"&a=b", [(b'a', b'b')]), 31 (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 32 (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), 33 (";", []), 34 (";;", []), 35 (";a=b", [('a', 'b')]), 36 ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), 37 ("a=1;a=2", [('a', '1'), ('a', '2')]), 38 (b";", []), 39 (b";;", []), 40 (b";a=b", [(b'a', b'b')]), 41 (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 42 (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), 43] 44 45parse_qs_test_cases = [ 46 ("", {}), 47 ("&", {}), 48 ("&&", {}), 49 ("=", {'': ['']}), 50 ("=a", {'': ['a']}), 51 ("a", {'a': ['']}), 52 ("a=", {'a': ['']}), 53 ("&a=b", {'a': ['b']}), 54 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 55 ("a=1&a=2", {'a': ['1', '2']}), 56 (b"", {}), 57 (b"&", {}), 58 (b"&&", {}), 59 (b"=", {b'': [b'']}), 60 (b"=a", {b'': [b'a']}), 61 (b"a", {b'a': [b'']}), 62 (b"a=", {b'a': [b'']}), 63 (b"&a=b", {b'a': [b'b']}), 64 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 65 (b"a=1&a=2", {b'a': [b'1', b'2']}), 66 (";", {}), 67 (";;", {}), 68 (";a=b", {'a': ['b']}), 69 ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), 70 ("a=1;a=2", {'a': ['1', '2']}), 71 (b";", {}), 72 (b";;", {}), 73 (b";a=b", {b'a': [b'b']}), 74 (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 75 (b"a=1;a=2", {b'a': [b'1', b'2']}), 76] 77 78class UrlParseTestCase(unittest.TestCase): 79 80 def checkRoundtrips(self, url, parsed, split): 81 result = urllib.parse.urlparse(url) 82 self.assertEqual(result, parsed) 83 t = (result.scheme, result.netloc, result.path, 84 result.params, result.query, result.fragment) 85 self.assertEqual(t, parsed) 86 # put it back together and it should be the same 87 result2 = urllib.parse.urlunparse(result) 88 self.assertEqual(result2, url) 89 self.assertEqual(result2, result.geturl()) 90 91 # the result of geturl() is a fixpoint; we can always parse it 92 # again to get the same result: 93 result3 = urllib.parse.urlparse(result.geturl()) 94 self.assertEqual(result3.geturl(), result.geturl()) 95 self.assertEqual(result3, result) 96 self.assertEqual(result3.scheme, result.scheme) 97 self.assertEqual(result3.netloc, result.netloc) 98 self.assertEqual(result3.path, result.path) 99 self.assertEqual(result3.params, result.params) 100 self.assertEqual(result3.query, result.query) 101 self.assertEqual(result3.fragment, result.fragment) 102 self.assertEqual(result3.username, result.username) 103 self.assertEqual(result3.password, result.password) 104 self.assertEqual(result3.hostname, result.hostname) 105 self.assertEqual(result3.port, result.port) 106 107 # check the roundtrip using urlsplit() as well 108 result = urllib.parse.urlsplit(url) 109 self.assertEqual(result, split) 110 t = (result.scheme, result.netloc, result.path, 111 result.query, result.fragment) 112 self.assertEqual(t, split) 113 result2 = urllib.parse.urlunsplit(result) 114 self.assertEqual(result2, url) 115 self.assertEqual(result2, result.geturl()) 116 117 # check the fixpoint property of re-parsing the result of geturl() 118 result3 = urllib.parse.urlsplit(result.geturl()) 119 self.assertEqual(result3.geturl(), result.geturl()) 120 self.assertEqual(result3, result) 121 self.assertEqual(result3.scheme, result.scheme) 122 self.assertEqual(result3.netloc, result.netloc) 123 self.assertEqual(result3.path, result.path) 124 self.assertEqual(result3.query, result.query) 125 self.assertEqual(result3.fragment, result.fragment) 126 self.assertEqual(result3.username, result.username) 127 self.assertEqual(result3.password, result.password) 128 self.assertEqual(result3.hostname, result.hostname) 129 self.assertEqual(result3.port, result.port) 130 131 def test_qsl(self): 132 for orig, expect in parse_qsl_test_cases: 133 result = urllib.parse.parse_qsl(orig, keep_blank_values=True) 134 self.assertEqual(result, expect, "Error parsing %r" % orig) 135 expect_without_blanks = [v for v in expect if len(v[1])] 136 result = urllib.parse.parse_qsl(orig, keep_blank_values=False) 137 self.assertEqual(result, expect_without_blanks, 138 "Error parsing %r" % orig) 139 140 def test_qs(self): 141 for orig, expect in parse_qs_test_cases: 142 result = urllib.parse.parse_qs(orig, keep_blank_values=True) 143 self.assertEqual(result, expect, "Error parsing %r" % orig) 144 expect_without_blanks = {v: expect[v] 145 for v in expect if len(expect[v][0])} 146 result = urllib.parse.parse_qs(orig, keep_blank_values=False) 147 self.assertEqual(result, expect_without_blanks, 148 "Error parsing %r" % orig) 149 150 def test_roundtrips(self): 151 str_cases = [ 152 ('file:///tmp/junk.txt', 153 ('file', '', '/tmp/junk.txt', '', '', ''), 154 ('file', '', '/tmp/junk.txt', '', '')), 155 ('imap://mail.python.org/mbox1', 156 ('imap', 'mail.python.org', '/mbox1', '', '', ''), 157 ('imap', 'mail.python.org', '/mbox1', '', '')), 158 ('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf', 159 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 160 '', '', ''), 161 ('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf', 162 '', '')), 163 ('nfs://server/path/to/file.txt', 164 ('nfs', 'server', '/path/to/file.txt', '', '', ''), 165 ('nfs', 'server', '/path/to/file.txt', '', '')), 166 ('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/', 167 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 168 '', '', ''), 169 ('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/', 170 '', '')), 171 ('git+ssh://git@github.com/user/project.git', 172 ('git+ssh', 'git@github.com','/user/project.git', 173 '','',''), 174 ('git+ssh', 'git@github.com','/user/project.git', 175 '', '')), 176 ] 177 def _encode(t): 178 return (t[0].encode('ascii'), 179 tuple(x.encode('ascii') for x in t[1]), 180 tuple(x.encode('ascii') for x in t[2])) 181 bytes_cases = [_encode(x) for x in str_cases] 182 for url, parsed, split in str_cases + bytes_cases: 183 self.checkRoundtrips(url, parsed, split) 184 185 def test_http_roundtrips(self): 186 # urllib.parse.urlsplit treats 'http:' as an optimized special case, 187 # so we test both 'http:' and 'https:' in all the following. 188 # Three cheers for white box knowledge! 189 str_cases = [ 190 ('://www.python.org', 191 ('www.python.org', '', '', '', ''), 192 ('www.python.org', '', '', '')), 193 ('://www.python.org#abc', 194 ('www.python.org', '', '', '', 'abc'), 195 ('www.python.org', '', '', 'abc')), 196 ('://www.python.org?q=abc', 197 ('www.python.org', '', '', 'q=abc', ''), 198 ('www.python.org', '', 'q=abc', '')), 199 ('://www.python.org/#abc', 200 ('www.python.org', '/', '', '', 'abc'), 201 ('www.python.org', '/', '', 'abc')), 202 ('://a/b/c/d;p?q#f', 203 ('a', '/b/c/d', 'p', 'q', 'f'), 204 ('a', '/b/c/d;p', 'q', 'f')), 205 ] 206 def _encode(t): 207 return (t[0].encode('ascii'), 208 tuple(x.encode('ascii') for x in t[1]), 209 tuple(x.encode('ascii') for x in t[2])) 210 bytes_cases = [_encode(x) for x in str_cases] 211 str_schemes = ('http', 'https') 212 bytes_schemes = (b'http', b'https') 213 str_tests = str_schemes, str_cases 214 bytes_tests = bytes_schemes, bytes_cases 215 for schemes, test_cases in (str_tests, bytes_tests): 216 for scheme in schemes: 217 for url, parsed, split in test_cases: 218 url = scheme + url 219 parsed = (scheme,) + parsed 220 split = (scheme,) + split 221 self.checkRoundtrips(url, parsed, split) 222 223 def checkJoin(self, base, relurl, expected): 224 str_components = (base, relurl, expected) 225 self.assertEqual(urllib.parse.urljoin(base, relurl), expected) 226 bytes_components = baseb, relurlb, expectedb = [ 227 x.encode('ascii') for x in str_components] 228 self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) 229 230 def test_unparse_parse(self): 231 str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] 232 bytes_cases = [x.encode('ascii') for x in str_cases] 233 for u in str_cases + bytes_cases: 234 self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) 235 self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) 236 237 def test_RFC1808(self): 238 # "normal" cases from RFC 1808: 239 self.checkJoin(RFC1808_BASE, 'g:h', 'g:h') 240 self.checkJoin(RFC1808_BASE, 'g', 'http://a/b/c/g') 241 self.checkJoin(RFC1808_BASE, './g', 'http://a/b/c/g') 242 self.checkJoin(RFC1808_BASE, 'g/', 'http://a/b/c/g/') 243 self.checkJoin(RFC1808_BASE, '/g', 'http://a/g') 244 self.checkJoin(RFC1808_BASE, '//g', 'http://g') 245 self.checkJoin(RFC1808_BASE, 'g?y', 'http://a/b/c/g?y') 246 self.checkJoin(RFC1808_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 247 self.checkJoin(RFC1808_BASE, '#s', 'http://a/b/c/d;p?q#s') 248 self.checkJoin(RFC1808_BASE, 'g#s', 'http://a/b/c/g#s') 249 self.checkJoin(RFC1808_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 250 self.checkJoin(RFC1808_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 251 self.checkJoin(RFC1808_BASE, 'g;x', 'http://a/b/c/g;x') 252 self.checkJoin(RFC1808_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 253 self.checkJoin(RFC1808_BASE, '.', 'http://a/b/c/') 254 self.checkJoin(RFC1808_BASE, './', 'http://a/b/c/') 255 self.checkJoin(RFC1808_BASE, '..', 'http://a/b/') 256 self.checkJoin(RFC1808_BASE, '../', 'http://a/b/') 257 self.checkJoin(RFC1808_BASE, '../g', 'http://a/b/g') 258 self.checkJoin(RFC1808_BASE, '../..', 'http://a/') 259 self.checkJoin(RFC1808_BASE, '../../', 'http://a/') 260 self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g') 261 262 # "abnormal" cases from RFC 1808: 263 self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f') 264 self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.') 265 self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g') 266 self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..') 267 self.checkJoin(RFC1808_BASE, '..g', 'http://a/b/c/..g') 268 self.checkJoin(RFC1808_BASE, './../g', 'http://a/b/g') 269 self.checkJoin(RFC1808_BASE, './g/.', 'http://a/b/c/g/') 270 self.checkJoin(RFC1808_BASE, 'g/./h', 'http://a/b/c/g/h') 271 self.checkJoin(RFC1808_BASE, 'g/../h', 'http://a/b/c/h') 272 273 # RFC 1808 and RFC 1630 disagree on these (according to RFC 1808), 274 # so we'll not actually run these tests (which expect 1808 behavior). 275 #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g') 276 #self.checkJoin(RFC1808_BASE, 'http:', 'http:') 277 278 # XXX: The following tests are no longer compatible with RFC3986 279 # self.checkJoin(RFC1808_BASE, '../../../g', 'http://a/../g') 280 # self.checkJoin(RFC1808_BASE, '../../../../g', 'http://a/../../g') 281 # self.checkJoin(RFC1808_BASE, '/./g', 'http://a/./g') 282 # self.checkJoin(RFC1808_BASE, '/../g', 'http://a/../g') 283 284 285 def test_RFC2368(self): 286 # Issue 11467: path that starts with a number is not parsed correctly 287 self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'), 288 ('mailto', '', '1337@example.org', '', '', '')) 289 290 def test_RFC2396(self): 291 # cases from RFC 2396 292 293 294 self.checkJoin(RFC2396_BASE, 'g:h', 'g:h') 295 self.checkJoin(RFC2396_BASE, 'g', 'http://a/b/c/g') 296 self.checkJoin(RFC2396_BASE, './g', 'http://a/b/c/g') 297 self.checkJoin(RFC2396_BASE, 'g/', 'http://a/b/c/g/') 298 self.checkJoin(RFC2396_BASE, '/g', 'http://a/g') 299 self.checkJoin(RFC2396_BASE, '//g', 'http://g') 300 self.checkJoin(RFC2396_BASE, 'g?y', 'http://a/b/c/g?y') 301 self.checkJoin(RFC2396_BASE, '#s', 'http://a/b/c/d;p?q#s') 302 self.checkJoin(RFC2396_BASE, 'g#s', 'http://a/b/c/g#s') 303 self.checkJoin(RFC2396_BASE, 'g?y#s', 'http://a/b/c/g?y#s') 304 self.checkJoin(RFC2396_BASE, 'g;x', 'http://a/b/c/g;x') 305 self.checkJoin(RFC2396_BASE, 'g;x?y#s', 'http://a/b/c/g;x?y#s') 306 self.checkJoin(RFC2396_BASE, '.', 'http://a/b/c/') 307 self.checkJoin(RFC2396_BASE, './', 'http://a/b/c/') 308 self.checkJoin(RFC2396_BASE, '..', 'http://a/b/') 309 self.checkJoin(RFC2396_BASE, '../', 'http://a/b/') 310 self.checkJoin(RFC2396_BASE, '../g', 'http://a/b/g') 311 self.checkJoin(RFC2396_BASE, '../..', 'http://a/') 312 self.checkJoin(RFC2396_BASE, '../../', 'http://a/') 313 self.checkJoin(RFC2396_BASE, '../../g', 'http://a/g') 314 self.checkJoin(RFC2396_BASE, '', RFC2396_BASE) 315 self.checkJoin(RFC2396_BASE, 'g.', 'http://a/b/c/g.') 316 self.checkJoin(RFC2396_BASE, '.g', 'http://a/b/c/.g') 317 self.checkJoin(RFC2396_BASE, 'g..', 'http://a/b/c/g..') 318 self.checkJoin(RFC2396_BASE, '..g', 'http://a/b/c/..g') 319 self.checkJoin(RFC2396_BASE, './../g', 'http://a/b/g') 320 self.checkJoin(RFC2396_BASE, './g/.', 'http://a/b/c/g/') 321 self.checkJoin(RFC2396_BASE, 'g/./h', 'http://a/b/c/g/h') 322 self.checkJoin(RFC2396_BASE, 'g/../h', 'http://a/b/c/h') 323 self.checkJoin(RFC2396_BASE, 'g;x=1/./y', 'http://a/b/c/g;x=1/y') 324 self.checkJoin(RFC2396_BASE, 'g;x=1/../y', 'http://a/b/c/y') 325 self.checkJoin(RFC2396_BASE, 'g?y/./x', 'http://a/b/c/g?y/./x') 326 self.checkJoin(RFC2396_BASE, 'g?y/../x', 'http://a/b/c/g?y/../x') 327 self.checkJoin(RFC2396_BASE, 'g#s/./x', 'http://a/b/c/g#s/./x') 328 self.checkJoin(RFC2396_BASE, 'g#s/../x', 'http://a/b/c/g#s/../x') 329 330 # XXX: The following tests are no longer compatible with RFC3986 331 # self.checkJoin(RFC2396_BASE, '../../../g', 'http://a/../g') 332 # self.checkJoin(RFC2396_BASE, '../../../../g', 'http://a/../../g') 333 # self.checkJoin(RFC2396_BASE, '/./g', 'http://a/./g') 334 # self.checkJoin(RFC2396_BASE, '/../g', 'http://a/../g') 335 336 337 def test_RFC3986(self): 338 # Test cases from RFC3986 339 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 340 self.checkJoin(RFC3986_BASE, ';x', 'http://a/b/c/;x') 341 self.checkJoin(RFC3986_BASE, 'g:h','g:h') 342 self.checkJoin(RFC3986_BASE, 'g','http://a/b/c/g') 343 self.checkJoin(RFC3986_BASE, './g','http://a/b/c/g') 344 self.checkJoin(RFC3986_BASE, 'g/','http://a/b/c/g/') 345 self.checkJoin(RFC3986_BASE, '/g','http://a/g') 346 self.checkJoin(RFC3986_BASE, '//g','http://g') 347 self.checkJoin(RFC3986_BASE, '?y','http://a/b/c/d;p?y') 348 self.checkJoin(RFC3986_BASE, 'g?y','http://a/b/c/g?y') 349 self.checkJoin(RFC3986_BASE, '#s','http://a/b/c/d;p?q#s') 350 self.checkJoin(RFC3986_BASE, 'g#s','http://a/b/c/g#s') 351 self.checkJoin(RFC3986_BASE, 'g?y#s','http://a/b/c/g?y#s') 352 self.checkJoin(RFC3986_BASE, ';x','http://a/b/c/;x') 353 self.checkJoin(RFC3986_BASE, 'g;x','http://a/b/c/g;x') 354 self.checkJoin(RFC3986_BASE, 'g;x?y#s','http://a/b/c/g;x?y#s') 355 self.checkJoin(RFC3986_BASE, '','http://a/b/c/d;p?q') 356 self.checkJoin(RFC3986_BASE, '.','http://a/b/c/') 357 self.checkJoin(RFC3986_BASE, './','http://a/b/c/') 358 self.checkJoin(RFC3986_BASE, '..','http://a/b/') 359 self.checkJoin(RFC3986_BASE, '../','http://a/b/') 360 self.checkJoin(RFC3986_BASE, '../g','http://a/b/g') 361 self.checkJoin(RFC3986_BASE, '../..','http://a/') 362 self.checkJoin(RFC3986_BASE, '../../','http://a/') 363 self.checkJoin(RFC3986_BASE, '../../g','http://a/g') 364 self.checkJoin(RFC3986_BASE, '../../../g', 'http://a/g') 365 366 #Abnormal Examples 367 368 # The 'abnormal scenarios' are incompatible with RFC2986 parsing 369 # Tests are here for reference. 370 371 self.checkJoin(RFC3986_BASE, '../../../g','http://a/g') 372 self.checkJoin(RFC3986_BASE, '../../../../g','http://a/g') 373 self.checkJoin(RFC3986_BASE, '/./g','http://a/g') 374 self.checkJoin(RFC3986_BASE, '/../g','http://a/g') 375 self.checkJoin(RFC3986_BASE, 'g.','http://a/b/c/g.') 376 self.checkJoin(RFC3986_BASE, '.g','http://a/b/c/.g') 377 self.checkJoin(RFC3986_BASE, 'g..','http://a/b/c/g..') 378 self.checkJoin(RFC3986_BASE, '..g','http://a/b/c/..g') 379 self.checkJoin(RFC3986_BASE, './../g','http://a/b/g') 380 self.checkJoin(RFC3986_BASE, './g/.','http://a/b/c/g/') 381 self.checkJoin(RFC3986_BASE, 'g/./h','http://a/b/c/g/h') 382 self.checkJoin(RFC3986_BASE, 'g/../h','http://a/b/c/h') 383 self.checkJoin(RFC3986_BASE, 'g;x=1/./y','http://a/b/c/g;x=1/y') 384 self.checkJoin(RFC3986_BASE, 'g;x=1/../y','http://a/b/c/y') 385 self.checkJoin(RFC3986_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 386 self.checkJoin(RFC3986_BASE, 'g?y/../x','http://a/b/c/g?y/../x') 387 self.checkJoin(RFC3986_BASE, 'g#s/./x','http://a/b/c/g#s/./x') 388 self.checkJoin(RFC3986_BASE, 'g#s/../x','http://a/b/c/g#s/../x') 389 #self.checkJoin(RFC3986_BASE, 'http:g','http:g') # strict parser 390 self.checkJoin(RFC3986_BASE, 'http:g','http://a/b/c/g') #relaxed parser 391 392 # Test for issue9721 393 self.checkJoin('http://a/b/c/de', ';x','http://a/b/c/;x') 394 395 def test_urljoins(self): 396 self.checkJoin(SIMPLE_BASE, 'g:h','g:h') 397 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 398 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 399 self.checkJoin(SIMPLE_BASE, 'g','http://a/b/c/g') 400 self.checkJoin(SIMPLE_BASE, './g','http://a/b/c/g') 401 self.checkJoin(SIMPLE_BASE, 'g/','http://a/b/c/g/') 402 self.checkJoin(SIMPLE_BASE, '/g','http://a/g') 403 self.checkJoin(SIMPLE_BASE, '//g','http://g') 404 self.checkJoin(SIMPLE_BASE, '?y','http://a/b/c/d?y') 405 self.checkJoin(SIMPLE_BASE, 'g?y','http://a/b/c/g?y') 406 self.checkJoin(SIMPLE_BASE, 'g?y/./x','http://a/b/c/g?y/./x') 407 self.checkJoin(SIMPLE_BASE, '.','http://a/b/c/') 408 self.checkJoin(SIMPLE_BASE, './','http://a/b/c/') 409 self.checkJoin(SIMPLE_BASE, '..','http://a/b/') 410 self.checkJoin(SIMPLE_BASE, '../','http://a/b/') 411 self.checkJoin(SIMPLE_BASE, '../g','http://a/b/g') 412 self.checkJoin(SIMPLE_BASE, '../..','http://a/') 413 self.checkJoin(SIMPLE_BASE, '../../g','http://a/g') 414 self.checkJoin(SIMPLE_BASE, './../g','http://a/b/g') 415 self.checkJoin(SIMPLE_BASE, './g/.','http://a/b/c/g/') 416 self.checkJoin(SIMPLE_BASE, 'g/./h','http://a/b/c/g/h') 417 self.checkJoin(SIMPLE_BASE, 'g/../h','http://a/b/c/h') 418 self.checkJoin(SIMPLE_BASE, 'http:g','http://a/b/c/g') 419 self.checkJoin(SIMPLE_BASE, 'http:','http://a/b/c/d') 420 self.checkJoin(SIMPLE_BASE, 'http:?y','http://a/b/c/d?y') 421 self.checkJoin(SIMPLE_BASE, 'http:g?y','http://a/b/c/g?y') 422 self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') 423 self.checkJoin('http:///', '..','http:///') 424 self.checkJoin('', 'http://a/b/c/g?y/./x','http://a/b/c/g?y/./x') 425 self.checkJoin('', 'http://a/./g', 'http://a/./g') 426 self.checkJoin('svn://pathtorepo/dir1', 'dir2', 'svn://pathtorepo/dir2') 427 self.checkJoin('svn+ssh://pathtorepo/dir1', 'dir2', 'svn+ssh://pathtorepo/dir2') 428 self.checkJoin('ws://a/b','g','ws://a/g') 429 self.checkJoin('wss://a/b','g','wss://a/g') 430 431 # XXX: The following tests are no longer compatible with RFC3986 432 # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') 433 # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') 434 435 # test for issue22118 duplicate slashes 436 self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo') 437 438 # Non-RFC-defined tests, covering variations of base and trailing 439 # slashes 440 self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/') 441 self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/') 442 self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/') 443 self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/') 444 self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') 445 self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') 446 447 # issue 23703: don't duplicate filename 448 self.checkJoin('a', 'b', 'b') 449 450 def test_RFC2732(self): 451 str_cases = [ 452 ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), 453 ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), 454 ('http://[::1]:5432/foo/', '::1', 5432), 455 ('http://[dead:beef::1]:5432/foo/', 'dead:beef::1', 5432), 456 ('http://[dead:beef::]:5432/foo/', 'dead:beef::', 5432), 457 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:5432/foo/', 458 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', 5432), 459 ('http://[::12.34.56.78]:5432/foo/', '::12.34.56.78', 5432), 460 ('http://[::ffff:12.34.56.78]:5432/foo/', 461 '::ffff:12.34.56.78', 5432), 462 ('http://Test.python.org/foo/', 'test.python.org', None), 463 ('http://12.34.56.78/foo/', '12.34.56.78', None), 464 ('http://[::1]/foo/', '::1', None), 465 ('http://[dead:beef::1]/foo/', 'dead:beef::1', None), 466 ('http://[dead:beef::]/foo/', 'dead:beef::', None), 467 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]/foo/', 468 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 469 ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), 470 ('http://[::ffff:12.34.56.78]/foo/', 471 '::ffff:12.34.56.78', None), 472 ('http://Test.python.org:/foo/', 'test.python.org', None), 473 ('http://12.34.56.78:/foo/', '12.34.56.78', None), 474 ('http://[::1]:/foo/', '::1', None), 475 ('http://[dead:beef::1]:/foo/', 'dead:beef::1', None), 476 ('http://[dead:beef::]:/foo/', 'dead:beef::', None), 477 ('http://[dead:beef:cafe:5417:affe:8FA3:deaf:feed]:/foo/', 478 'dead:beef:cafe:5417:affe:8fa3:deaf:feed', None), 479 ('http://[::12.34.56.78]:/foo/', '::12.34.56.78', None), 480 ('http://[::ffff:12.34.56.78]:/foo/', 481 '::ffff:12.34.56.78', None), 482 ] 483 def _encode(t): 484 return t[0].encode('ascii'), t[1].encode('ascii'), t[2] 485 bytes_cases = [_encode(x) for x in str_cases] 486 for url, hostname, port in str_cases + bytes_cases: 487 urlparsed = urllib.parse.urlparse(url) 488 self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) 489 490 str_cases = [ 491 'http://::12.34.56.78]/', 492 'http://[::1/foo/', 493 'ftp://[::1/foo/bad]/bad', 494 'http://[::1/foo/bad]/bad', 495 'http://[::ffff:12.34.56.78'] 496 bytes_cases = [x.encode('ascii') for x in str_cases] 497 for invalid_url in str_cases + bytes_cases: 498 self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) 499 500 def test_urldefrag(self): 501 str_cases = [ 502 ('http://python.org#frag', 'http://python.org', 'frag'), 503 ('http://python.org', 'http://python.org', ''), 504 ('http://python.org/#frag', 'http://python.org/', 'frag'), 505 ('http://python.org/', 'http://python.org/', ''), 506 ('http://python.org/?q#frag', 'http://python.org/?q', 'frag'), 507 ('http://python.org/?q', 'http://python.org/?q', ''), 508 ('http://python.org/p#frag', 'http://python.org/p', 'frag'), 509 ('http://python.org/p?q', 'http://python.org/p?q', ''), 510 (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), 511 (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), 512 ] 513 def _encode(t): 514 return type(t)(x.encode('ascii') for x in t) 515 bytes_cases = [_encode(x) for x in str_cases] 516 for url, defrag, frag in str_cases + bytes_cases: 517 result = urllib.parse.urldefrag(url) 518 self.assertEqual(result.geturl(), url) 519 self.assertEqual(result, (defrag, frag)) 520 self.assertEqual(result.url, defrag) 521 self.assertEqual(result.fragment, frag) 522 523 def test_urlsplit_attributes(self): 524 url = "HTTP://WWW.PYTHON.ORG/doc/#frag" 525 p = urllib.parse.urlsplit(url) 526 self.assertEqual(p.scheme, "http") 527 self.assertEqual(p.netloc, "WWW.PYTHON.ORG") 528 self.assertEqual(p.path, "/doc/") 529 self.assertEqual(p.query, "") 530 self.assertEqual(p.fragment, "frag") 531 self.assertEqual(p.username, None) 532 self.assertEqual(p.password, None) 533 self.assertEqual(p.hostname, "www.python.org") 534 self.assertEqual(p.port, None) 535 # geturl() won't return exactly the original URL in this case 536 # since the scheme is always case-normalized 537 # We handle this by ignoring the first 4 characters of the URL 538 self.assertEqual(p.geturl()[4:], url[4:]) 539 540 url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" 541 p = urllib.parse.urlsplit(url) 542 self.assertEqual(p.scheme, "http") 543 self.assertEqual(p.netloc, "User:Pass@www.python.org:080") 544 self.assertEqual(p.path, "/doc/") 545 self.assertEqual(p.query, "query=yes") 546 self.assertEqual(p.fragment, "frag") 547 self.assertEqual(p.username, "User") 548 self.assertEqual(p.password, "Pass") 549 self.assertEqual(p.hostname, "www.python.org") 550 self.assertEqual(p.port, 80) 551 self.assertEqual(p.geturl(), url) 552 553 # Addressing issue1698, which suggests Username can contain 554 # "@" characters. Though not RFC compliant, many ftp sites allow 555 # and request email addresses as usernames. 556 557 url = "http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 558 p = urllib.parse.urlsplit(url) 559 self.assertEqual(p.scheme, "http") 560 self.assertEqual(p.netloc, "User@example.com:Pass@www.python.org:080") 561 self.assertEqual(p.path, "/doc/") 562 self.assertEqual(p.query, "query=yes") 563 self.assertEqual(p.fragment, "frag") 564 self.assertEqual(p.username, "User@example.com") 565 self.assertEqual(p.password, "Pass") 566 self.assertEqual(p.hostname, "www.python.org") 567 self.assertEqual(p.port, 80) 568 self.assertEqual(p.geturl(), url) 569 570 # And check them all again, only with bytes this time 571 url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" 572 p = urllib.parse.urlsplit(url) 573 self.assertEqual(p.scheme, b"http") 574 self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") 575 self.assertEqual(p.path, b"/doc/") 576 self.assertEqual(p.query, b"") 577 self.assertEqual(p.fragment, b"frag") 578 self.assertEqual(p.username, None) 579 self.assertEqual(p.password, None) 580 self.assertEqual(p.hostname, b"www.python.org") 581 self.assertEqual(p.port, None) 582 self.assertEqual(p.geturl()[4:], url[4:]) 583 584 url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" 585 p = urllib.parse.urlsplit(url) 586 self.assertEqual(p.scheme, b"http") 587 self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") 588 self.assertEqual(p.path, b"/doc/") 589 self.assertEqual(p.query, b"query=yes") 590 self.assertEqual(p.fragment, b"frag") 591 self.assertEqual(p.username, b"User") 592 self.assertEqual(p.password, b"Pass") 593 self.assertEqual(p.hostname, b"www.python.org") 594 self.assertEqual(p.port, 80) 595 self.assertEqual(p.geturl(), url) 596 597 url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" 598 p = urllib.parse.urlsplit(url) 599 self.assertEqual(p.scheme, b"http") 600 self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") 601 self.assertEqual(p.path, b"/doc/") 602 self.assertEqual(p.query, b"query=yes") 603 self.assertEqual(p.fragment, b"frag") 604 self.assertEqual(p.username, b"User@example.com") 605 self.assertEqual(p.password, b"Pass") 606 self.assertEqual(p.hostname, b"www.python.org") 607 self.assertEqual(p.port, 80) 608 self.assertEqual(p.geturl(), url) 609 610 # Verify an illegal port raises ValueError 611 url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" 612 p = urllib.parse.urlsplit(url) 613 with self.assertRaisesRegex(ValueError, "out of range"): 614 p.port 615 616 def test_attributes_bad_port(self): 617 """Check handling of invalid ports.""" 618 for bytes in (False, True): 619 for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): 620 for port in ("foo", "1.5", "-1", "0x10"): 621 with self.subTest(bytes=bytes, parse=parse, port=port): 622 netloc = "www.example.net:" + port 623 url = "http://" + netloc 624 if bytes: 625 netloc = netloc.encode("ascii") 626 url = url.encode("ascii") 627 p = parse(url) 628 self.assertEqual(p.netloc, netloc) 629 with self.assertRaises(ValueError): 630 p.port 631 632 def test_attributes_without_netloc(self): 633 # This example is straight from RFC 3261. It looks like it 634 # should allow the username, hostname, and port to be filled 635 # in, but doesn't. Since it's a URI and doesn't use the 636 # scheme://netloc syntax, the netloc and related attributes 637 # should be left empty. 638 uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 639 p = urllib.parse.urlsplit(uri) 640 self.assertEqual(p.netloc, "") 641 self.assertEqual(p.username, None) 642 self.assertEqual(p.password, None) 643 self.assertEqual(p.hostname, None) 644 self.assertEqual(p.port, None) 645 self.assertEqual(p.geturl(), uri) 646 647 p = urllib.parse.urlparse(uri) 648 self.assertEqual(p.netloc, "") 649 self.assertEqual(p.username, None) 650 self.assertEqual(p.password, None) 651 self.assertEqual(p.hostname, None) 652 self.assertEqual(p.port, None) 653 self.assertEqual(p.geturl(), uri) 654 655 # You guessed it, repeating the test with bytes input 656 uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" 657 p = urllib.parse.urlsplit(uri) 658 self.assertEqual(p.netloc, b"") 659 self.assertEqual(p.username, None) 660 self.assertEqual(p.password, None) 661 self.assertEqual(p.hostname, None) 662 self.assertEqual(p.port, None) 663 self.assertEqual(p.geturl(), uri) 664 665 p = urllib.parse.urlparse(uri) 666 self.assertEqual(p.netloc, b"") 667 self.assertEqual(p.username, None) 668 self.assertEqual(p.password, None) 669 self.assertEqual(p.hostname, None) 670 self.assertEqual(p.port, None) 671 self.assertEqual(p.geturl(), uri) 672 673 def test_noslash(self): 674 # Issue 1637: http://foo.com?query is legal 675 self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), 676 ('http', 'example.com', '', '', 'blahblah=/foo', '')) 677 self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), 678 (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) 679 680 def test_withoutscheme(self): 681 # Test urlparse without scheme 682 # Issue 754016: urlparse goes wrong with IP:port without scheme 683 # RFC 1808 specifies that netloc should start with //, urlparse expects 684 # the same, otherwise it classifies the portion of url as path. 685 self.assertEqual(urllib.parse.urlparse("path"), 686 ('','','path','','','')) 687 self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), 688 ('','www.python.org:80','','','','')) 689 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), 690 ('http','www.python.org:80','','','','')) 691 # Repeat for bytes input 692 self.assertEqual(urllib.parse.urlparse(b"path"), 693 (b'',b'',b'path',b'',b'',b'')) 694 self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), 695 (b'',b'www.python.org:80',b'',b'',b'',b'')) 696 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), 697 (b'http',b'www.python.org:80',b'',b'',b'',b'')) 698 699 def test_portseparator(self): 700 # Issue 754016 makes changes for port separator ':' from scheme separator 701 self.assertEqual(urllib.parse.urlparse("path:80"), 702 ('','','path:80','','','')) 703 self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) 704 self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) 705 self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), 706 ('http','www.python.org:80','','','','')) 707 # As usual, need to check bytes input as well 708 self.assertEqual(urllib.parse.urlparse(b"path:80"), 709 (b'',b'',b'path:80',b'',b'',b'')) 710 self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) 711 self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) 712 self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), 713 (b'http',b'www.python.org:80',b'',b'',b'',b'')) 714 715 def test_usingsys(self): 716 # Issue 3314: sys module is used in the error 717 self.assertRaises(TypeError, urllib.parse.urlencode, "foo") 718 719 def test_anyscheme(self): 720 # Issue 7904: s3://foo.com/stuff has netloc "foo.com". 721 self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"), 722 ('s3', 'foo.com', '/stuff', '', '', '')) 723 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), 724 ('x-newscheme', 'foo.com', '/stuff', '', '', '')) 725 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"), 726 ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment')) 727 self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"), 728 ('x-newscheme', 'foo.com', '/stuff', '', 'query', '')) 729 730 # And for bytes... 731 self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), 732 (b's3', b'foo.com', b'/stuff', b'', b'', b'')) 733 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), 734 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) 735 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"), 736 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment')) 737 self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"), 738 (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'')) 739 740 def test_default_scheme(self): 741 # Exercise the scheme parameter of urlparse() and urlsplit() 742 for func in (urllib.parse.urlparse, urllib.parse.urlsplit): 743 with self.subTest(function=func): 744 result = func("http://example.net/", "ftp") 745 self.assertEqual(result.scheme, "http") 746 result = func(b"http://example.net/", b"ftp") 747 self.assertEqual(result.scheme, b"http") 748 self.assertEqual(func("path", "ftp").scheme, "ftp") 749 self.assertEqual(func("path", scheme="ftp").scheme, "ftp") 750 self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp") 751 self.assertEqual(func("path").scheme, "") 752 self.assertEqual(func(b"path").scheme, b"") 753 self.assertEqual(func(b"path", "").scheme, b"") 754 755 def test_parse_fragments(self): 756 # Exercise the allow_fragments parameter of urlparse() and urlsplit() 757 tests = ( 758 ("http:#frag", "path"), 759 ("//example.net#frag", "path"), 760 ("index.html#frag", "path"), 761 (";a=b#frag", "params"), 762 ("?a=b#frag", "query"), 763 ("#frag", "path"), 764 ) 765 for url, attr in tests: 766 for func in (urllib.parse.urlparse, urllib.parse.urlsplit): 767 if attr == "params" and func is urllib.parse.urlsplit: 768 attr = "path" 769 with self.subTest(url=url, function=func): 770 result = func(url, allow_fragments=False) 771 self.assertEqual(result.fragment, "") 772 self.assertTrue(getattr(result, attr).endswith("#frag")) 773 self.assertEqual(func(url, "", False).fragment, "") 774 775 result = func(url, allow_fragments=True) 776 self.assertEqual(result.fragment, "frag") 777 self.assertFalse(getattr(result, attr).endswith("frag")) 778 self.assertEqual(func(url, "", True).fragment, "frag") 779 self.assertEqual(func(url).fragment, "frag") 780 781 def test_mixed_types_rejected(self): 782 # Several functions that process either strings or ASCII encoded bytes 783 # accept multiple arguments. Check they reject mixed type input 784 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 785 urllib.parse.urlparse("www.python.org", b"http") 786 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 787 urllib.parse.urlparse(b"www.python.org", "http") 788 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 789 urllib.parse.urlsplit("www.python.org", b"http") 790 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 791 urllib.parse.urlsplit(b"www.python.org", "http") 792 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 793 urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) 794 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 795 urllib.parse.urlunparse(("http", b"www.python.org","","","","")) 796 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 797 urllib.parse.urlunsplit((b"http", "www.python.org","","","")) 798 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 799 urllib.parse.urlunsplit(("http", b"www.python.org","","","")) 800 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 801 urllib.parse.urljoin("http://python.org", b"http://python.org") 802 with self.assertRaisesRegex(TypeError, "Cannot mix str"): 803 urllib.parse.urljoin(b"http://python.org", "http://python.org") 804 805 def _check_result_type(self, str_type): 806 num_args = len(str_type._fields) 807 bytes_type = str_type._encoded_counterpart 808 self.assertIs(bytes_type._decoded_counterpart, str_type) 809 str_args = ('',) * num_args 810 bytes_args = (b'',) * num_args 811 str_result = str_type(*str_args) 812 bytes_result = bytes_type(*bytes_args) 813 encoding = 'ascii' 814 errors = 'strict' 815 self.assertEqual(str_result, str_args) 816 self.assertEqual(bytes_result.decode(), str_args) 817 self.assertEqual(bytes_result.decode(), str_result) 818 self.assertEqual(bytes_result.decode(encoding), str_args) 819 self.assertEqual(bytes_result.decode(encoding), str_result) 820 self.assertEqual(bytes_result.decode(encoding, errors), str_args) 821 self.assertEqual(bytes_result.decode(encoding, errors), str_result) 822 self.assertEqual(bytes_result, bytes_args) 823 self.assertEqual(str_result.encode(), bytes_args) 824 self.assertEqual(str_result.encode(), bytes_result) 825 self.assertEqual(str_result.encode(encoding), bytes_args) 826 self.assertEqual(str_result.encode(encoding), bytes_result) 827 self.assertEqual(str_result.encode(encoding, errors), bytes_args) 828 self.assertEqual(str_result.encode(encoding, errors), bytes_result) 829 830 def test_result_pairs(self): 831 # Check encoding and decoding between result pairs 832 result_types = [ 833 urllib.parse.DefragResult, 834 urllib.parse.SplitResult, 835 urllib.parse.ParseResult, 836 ] 837 for result_type in result_types: 838 self._check_result_type(result_type) 839 840 def test_parse_qs_encoding(self): 841 result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1") 842 self.assertEqual(result, {'key': ['\u0141\xE9']}) 843 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="utf-8") 844 self.assertEqual(result, {'key': ['\u0141\xE9']}) 845 result = urllib.parse.parse_qs("key=\u0141%C3%A9", encoding="ascii") 846 self.assertEqual(result, {'key': ['\u0141\ufffd\ufffd']}) 847 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii") 848 self.assertEqual(result, {'key': ['\u0141\ufffd-']}) 849 result = urllib.parse.parse_qs("key=\u0141%E9-", encoding="ascii", 850 errors="ignore") 851 self.assertEqual(result, {'key': ['\u0141-']}) 852 853 def test_parse_qsl_encoding(self): 854 result = urllib.parse.parse_qsl("key=\u0141%E9", encoding="latin-1") 855 self.assertEqual(result, [('key', '\u0141\xE9')]) 856 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="utf-8") 857 self.assertEqual(result, [('key', '\u0141\xE9')]) 858 result = urllib.parse.parse_qsl("key=\u0141%C3%A9", encoding="ascii") 859 self.assertEqual(result, [('key', '\u0141\ufffd\ufffd')]) 860 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii") 861 self.assertEqual(result, [('key', '\u0141\ufffd-')]) 862 result = urllib.parse.parse_qsl("key=\u0141%E9-", encoding="ascii", 863 errors="ignore") 864 self.assertEqual(result, [('key', '\u0141-')]) 865 866 def test_urlencode_sequences(self): 867 # Other tests incidentally urlencode things; test non-covered cases: 868 # Sequence and object values. 869 result = urllib.parse.urlencode({'a': [1, 2], 'b': (3, 4, 5)}, True) 870 # we cannot rely on ordering here 871 assert set(result.split('&')) == {'a=1', 'a=2', 'b=3', 'b=4', 'b=5'} 872 873 class Trivial: 874 def __str__(self): 875 return 'trivial' 876 877 result = urllib.parse.urlencode({'a': Trivial()}, True) 878 self.assertEqual(result, 'a=trivial') 879 880 def test_urlencode_quote_via(self): 881 result = urllib.parse.urlencode({'a': 'some value'}) 882 self.assertEqual(result, "a=some+value") 883 result = urllib.parse.urlencode({'a': 'some value/another'}, 884 quote_via=urllib.parse.quote) 885 self.assertEqual(result, "a=some%20value%2Fanother") 886 result = urllib.parse.urlencode({'a': 'some value/another'}, 887 safe='/', quote_via=urllib.parse.quote) 888 self.assertEqual(result, "a=some%20value/another") 889 890 def test_quote_from_bytes(self): 891 self.assertRaises(TypeError, urllib.parse.quote_from_bytes, 'foo') 892 result = urllib.parse.quote_from_bytes(b'archaeological arcana') 893 self.assertEqual(result, 'archaeological%20arcana') 894 result = urllib.parse.quote_from_bytes(b'') 895 self.assertEqual(result, '') 896 897 def test_unquote_to_bytes(self): 898 result = urllib.parse.unquote_to_bytes('abc%20def') 899 self.assertEqual(result, b'abc def') 900 result = urllib.parse.unquote_to_bytes('') 901 self.assertEqual(result, b'') 902 903 def test_quote_errors(self): 904 self.assertRaises(TypeError, urllib.parse.quote, b'foo', 905 encoding='utf-8') 906 self.assertRaises(TypeError, urllib.parse.quote, b'foo', errors='strict') 907 908 def test_issue14072(self): 909 p1 = urllib.parse.urlsplit('tel:+31-641044153') 910 self.assertEqual(p1.scheme, 'tel') 911 self.assertEqual(p1.path, '+31-641044153') 912 p2 = urllib.parse.urlsplit('tel:+31641044153') 913 self.assertEqual(p2.scheme, 'tel') 914 self.assertEqual(p2.path, '+31641044153') 915 # assert the behavior for urlparse 916 p1 = urllib.parse.urlparse('tel:+31-641044153') 917 self.assertEqual(p1.scheme, 'tel') 918 self.assertEqual(p1.path, '+31-641044153') 919 p2 = urllib.parse.urlparse('tel:+31641044153') 920 self.assertEqual(p2.scheme, 'tel') 921 self.assertEqual(p2.path, '+31641044153') 922 923 def test_telurl_params(self): 924 p1 = urllib.parse.urlparse('tel:123-4;phone-context=+1-650-516') 925 self.assertEqual(p1.scheme, 'tel') 926 self.assertEqual(p1.path, '123-4') 927 self.assertEqual(p1.params, 'phone-context=+1-650-516') 928 929 p1 = urllib.parse.urlparse('tel:+1-201-555-0123') 930 self.assertEqual(p1.scheme, 'tel') 931 self.assertEqual(p1.path, '+1-201-555-0123') 932 self.assertEqual(p1.params, '') 933 934 p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com') 935 self.assertEqual(p1.scheme, 'tel') 936 self.assertEqual(p1.path, '7042') 937 self.assertEqual(p1.params, 'phone-context=example.com') 938 939 p1 = urllib.parse.urlparse('tel:863-1234;phone-context=+1-914-555') 940 self.assertEqual(p1.scheme, 'tel') 941 self.assertEqual(p1.path, '863-1234') 942 self.assertEqual(p1.params, 'phone-context=+1-914-555') 943 944 def test_Quoter_repr(self): 945 quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) 946 self.assertIn('Quoter', repr(quoter)) 947 948 def test_all(self): 949 expected = [] 950 undocumented = { 951 'splitattr', 'splithost', 'splitnport', 'splitpasswd', 952 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', 953 'splitvalue', 954 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', 955 } 956 for name in dir(urllib.parse): 957 if name.startswith('_') or name in undocumented: 958 continue 959 object = getattr(urllib.parse, name) 960 if getattr(object, '__module__', None) == 'urllib.parse': 961 expected.append(name) 962 self.assertCountEqual(urllib.parse.__all__, expected) 963 964 965class Utility_Tests(unittest.TestCase): 966 """Testcase to test the various utility functions in the urllib.""" 967 # In Python 2 this test class was in test_urllib. 968 969 def test_splittype(self): 970 splittype = urllib.parse.splittype 971 self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring')) 972 self.assertEqual(splittype('opaquestring'), (None, 'opaquestring')) 973 self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring')) 974 self.assertEqual(splittype('type:'), ('type', '')) 975 self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string')) 976 977 def test_splithost(self): 978 splithost = urllib.parse.splithost 979 self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'), 980 ('www.example.org:80', '/foo/bar/baz.html')) 981 self.assertEqual(splithost('//www.example.org:80'), 982 ('www.example.org:80', '')) 983 self.assertEqual(splithost('/foo/bar/baz.html'), 984 (None, '/foo/bar/baz.html')) 985 986 def test_splituser(self): 987 splituser = urllib.parse.splituser 988 self.assertEqual(splituser('User:Pass@www.python.org:080'), 989 ('User:Pass', 'www.python.org:080')) 990 self.assertEqual(splituser('@www.python.org:080'), 991 ('', 'www.python.org:080')) 992 self.assertEqual(splituser('www.python.org:080'), 993 (None, 'www.python.org:080')) 994 self.assertEqual(splituser('User:Pass@'), 995 ('User:Pass', '')) 996 self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'), 997 ('User@example.com:Pass', 'www.python.org:080')) 998 999 def test_splitpasswd(self): 1000 # Some of the password examples are not sensible, but it is added to 1001 # confirming to RFC2617 and addressing issue4675. 1002 splitpasswd = urllib.parse.splitpasswd 1003 self.assertEqual(splitpasswd('user:ab'), ('user', 'ab')) 1004 self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb')) 1005 self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb')) 1006 self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb')) 1007 self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb')) 1008 self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb')) 1009 self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b')) 1010 self.assertEqual(splitpasswd('user:a b'), ('user', 'a b')) 1011 self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab')) 1012 self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b')) 1013 self.assertEqual(splitpasswd('user:'), ('user', '')) 1014 self.assertEqual(splitpasswd('user'), ('user', None)) 1015 self.assertEqual(splitpasswd(':ab'), ('', 'ab')) 1016 1017 def test_splitport(self): 1018 splitport = urllib.parse.splitport 1019 self.assertEqual(splitport('parrot:88'), ('parrot', '88')) 1020 self.assertEqual(splitport('parrot'), ('parrot', None)) 1021 self.assertEqual(splitport('parrot:'), ('parrot', None)) 1022 self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None)) 1023 self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None)) 1024 self.assertEqual(splitport('[::1]:88'), ('[::1]', '88')) 1025 self.assertEqual(splitport('[::1]'), ('[::1]', None)) 1026 self.assertEqual(splitport(':88'), ('', '88')) 1027 1028 def test_splitnport(self): 1029 splitnport = urllib.parse.splitnport 1030 self.assertEqual(splitnport('parrot:88'), ('parrot', 88)) 1031 self.assertEqual(splitnport('parrot'), ('parrot', -1)) 1032 self.assertEqual(splitnport('parrot', 55), ('parrot', 55)) 1033 self.assertEqual(splitnport('parrot:'), ('parrot', -1)) 1034 self.assertEqual(splitnport('parrot:', 55), ('parrot', 55)) 1035 self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1)) 1036 self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55)) 1037 self.assertEqual(splitnport('parrot:cheese'), ('parrot', None)) 1038 self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None)) 1039 1040 def test_splitquery(self): 1041 # Normal cases are exercised by other tests; ensure that we also 1042 # catch cases with no port specified (testcase ensuring coverage) 1043 splitquery = urllib.parse.splitquery 1044 self.assertEqual(splitquery('http://python.org/fake?foo=bar'), 1045 ('http://python.org/fake', 'foo=bar')) 1046 self.assertEqual(splitquery('http://python.org/fake?foo=bar?'), 1047 ('http://python.org/fake?foo=bar', '')) 1048 self.assertEqual(splitquery('http://python.org/fake'), 1049 ('http://python.org/fake', None)) 1050 self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar')) 1051 1052 def test_splittag(self): 1053 splittag = urllib.parse.splittag 1054 self.assertEqual(splittag('http://example.com?foo=bar#baz'), 1055 ('http://example.com?foo=bar', 'baz')) 1056 self.assertEqual(splittag('http://example.com?foo=bar#'), 1057 ('http://example.com?foo=bar', '')) 1058 self.assertEqual(splittag('#baz'), ('', 'baz')) 1059 self.assertEqual(splittag('http://example.com?foo=bar'), 1060 ('http://example.com?foo=bar', None)) 1061 self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'), 1062 ('http://example.com?foo=bar#baz', 'boo')) 1063 1064 def test_splitattr(self): 1065 splitattr = urllib.parse.splitattr 1066 self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'), 1067 ('/path', ['attr1=value1', 'attr2=value2'])) 1068 self.assertEqual(splitattr('/path;'), ('/path', [''])) 1069 self.assertEqual(splitattr(';attr1=value1;attr2=value2'), 1070 ('', ['attr1=value1', 'attr2=value2'])) 1071 self.assertEqual(splitattr('/path'), ('/path', [])) 1072 1073 def test_splitvalue(self): 1074 # Normal cases are exercised by other tests; test pathological cases 1075 # with no key/value pairs. (testcase ensuring coverage) 1076 splitvalue = urllib.parse.splitvalue 1077 self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar')) 1078 self.assertEqual(splitvalue('foo='), ('foo', '')) 1079 self.assertEqual(splitvalue('=bar'), ('', 'bar')) 1080 self.assertEqual(splitvalue('foobar'), ('foobar', None)) 1081 self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz')) 1082 1083 def test_to_bytes(self): 1084 result = urllib.parse.to_bytes('http://www.python.org') 1085 self.assertEqual(result, 'http://www.python.org') 1086 self.assertRaises(UnicodeError, urllib.parse.to_bytes, 1087 'http://www.python.org/medi\u00e6val') 1088 1089 def test_unwrap(self): 1090 url = urllib.parse.unwrap('<URL:type://host/path>') 1091 self.assertEqual(url, 'type://host/path') 1092 1093 1094if __name__ == "__main__": 1095 unittest.main() 1096