1"""Regression tests for what was in Python 2's "urllib" module"""
2
3import urllib.parse
4import urllib.request
5import urllib.error
6import http.client
7import email.message
8import io
9import unittest
10from unittest.mock import patch
11from test import support
12import os
13try:
14    import ssl
15except ImportError:
16    ssl = None
17import sys
18import tempfile
19from nturl2path import url2pathname, pathname2url
20
21from base64 import b64encode
22import collections
23
24
25def hexescape(char):
26    """Escape char as RFC 2396 specifies"""
27    hex_repr = hex(ord(char))[2:].upper()
28    if len(hex_repr) == 1:
29        hex_repr = "0%s" % hex_repr
30    return "%" + hex_repr
31
32# Shortcut for testing FancyURLopener
33_urlopener = None
34
35
36def urlopen(url, data=None, proxies=None):
37    """urlopen(url [, data]) -> open file-like object"""
38    global _urlopener
39    if proxies is not None:
40        opener = urllib.request.FancyURLopener(proxies=proxies)
41    elif not _urlopener:
42        opener = FancyURLopener()
43        _urlopener = opener
44    else:
45        opener = _urlopener
46    if data is None:
47        return opener.open(url)
48    else:
49        return opener.open(url, data)
50
51
52def FancyURLopener():
53    with support.check_warnings(
54            ('FancyURLopener style of invoking requests is deprecated.',
55            DeprecationWarning)):
56        return urllib.request.FancyURLopener()
57
58
59def fakehttp(fakedata):
60    class FakeSocket(io.BytesIO):
61        io_refs = 1
62
63        def sendall(self, data):
64            FakeHTTPConnection.buf = data
65
66        def makefile(self, *args, **kwds):
67            self.io_refs += 1
68            return self
69
70        def read(self, amt=None):
71            if self.closed:
72                return b""
73            return io.BytesIO.read(self, amt)
74
75        def readline(self, length=None):
76            if self.closed:
77                return b""
78            return io.BytesIO.readline(self, length)
79
80        def close(self):
81            self.io_refs -= 1
82            if self.io_refs == 0:
83                io.BytesIO.close(self)
84
85    class FakeHTTPConnection(http.client.HTTPConnection):
86
87        # buffer to store data for verification in urlopen tests.
88        buf = None
89
90        def connect(self):
91            self.sock = FakeSocket(self.fakedata)
92            type(self).fakesock = self.sock
93    FakeHTTPConnection.fakedata = fakedata
94
95    return FakeHTTPConnection
96
97
98class FakeHTTPMixin(object):
99    def fakehttp(self, fakedata):
100        self._connection_class = http.client.HTTPConnection
101        http.client.HTTPConnection = fakehttp(fakedata)
102
103    def unfakehttp(self):
104        http.client.HTTPConnection = self._connection_class
105
106
107class FakeFTPMixin(object):
108    def fakeftp(self):
109        class FakeFtpWrapper(object):
110            def __init__(self,  user, passwd, host, port, dirs, timeout=None,
111                     persistent=True):
112                pass
113
114            def retrfile(self, file, type):
115                return io.BytesIO(), 0
116
117            def close(self):
118                pass
119
120        self._ftpwrapper_class = urllib.request.ftpwrapper
121        urllib.request.ftpwrapper = FakeFtpWrapper
122
123    def unfakeftp(self):
124        urllib.request.ftpwrapper = self._ftpwrapper_class
125
126
127class urlopen_FileTests(unittest.TestCase):
128    """Test urlopen() opening a temporary file.
129
130    Try to test as much functionality as possible so as to cut down on reliance
131    on connecting to the Net for testing.
132
133    """
134
135    def setUp(self):
136        # Create a temp file to use for testing
137        self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138                          "ascii")
139        f = open(support.TESTFN, 'wb')
140        try:
141            f.write(self.text)
142        finally:
143            f.close()
144        self.pathname = support.TESTFN
145        self.returned_obj = urlopen("file:%s" % self.pathname)
146
147    def tearDown(self):
148        """Shut down the open object"""
149        self.returned_obj.close()
150        os.remove(support.TESTFN)
151
152    def test_interface(self):
153        # Make sure object returned by urlopen() has the specified methods
154        for attr in ("read", "readline", "readlines", "fileno",
155                     "close", "info", "geturl", "getcode", "__iter__"):
156            self.assertTrue(hasattr(self.returned_obj, attr),
157                         "object returned by urlopen() lacks %s attribute" %
158                         attr)
159
160    def test_read(self):
161        self.assertEqual(self.text, self.returned_obj.read())
162
163    def test_readline(self):
164        self.assertEqual(self.text, self.returned_obj.readline())
165        self.assertEqual(b'', self.returned_obj.readline(),
166                         "calling readline() after exhausting the file did not"
167                         " return an empty string")
168
169    def test_readlines(self):
170        lines_list = self.returned_obj.readlines()
171        self.assertEqual(len(lines_list), 1,
172                         "readlines() returned the wrong number of lines")
173        self.assertEqual(lines_list[0], self.text,
174                         "readlines() returned improper text")
175
176    def test_fileno(self):
177        file_num = self.returned_obj.fileno()
178        self.assertIsInstance(file_num, int, "fileno() did not return an int")
179        self.assertEqual(os.read(file_num, len(self.text)), self.text,
180                         "Reading on the file descriptor returned by fileno() "
181                         "did not return the expected text")
182
183    def test_close(self):
184        # Test close() by calling it here and then having it be called again
185        # by the tearDown() method for the test
186        self.returned_obj.close()
187
188    def test_info(self):
189        self.assertIsInstance(self.returned_obj.info(), email.message.Message)
190
191    def test_geturl(self):
192        self.assertEqual(self.returned_obj.geturl(), self.pathname)
193
194    def test_getcode(self):
195        self.assertIsNone(self.returned_obj.getcode())
196
197    def test_iter(self):
198        # Test iterator
199        # Don't need to count number of iterations since test would fail the
200        # instant it returned anything beyond the first line from the
201        # comparison.
202        # Use the iterator in the usual implicit way to test for ticket #4608.
203        for line in self.returned_obj:
204            self.assertEqual(line, self.text)
205
206    def test_relativelocalfile(self):
207        self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208
209class ProxyTests(unittest.TestCase):
210
211    def setUp(self):
212        # Records changes to env vars
213        self.env = support.EnvironmentVarGuard()
214        # Delete all proxy related env vars
215        for k in list(os.environ):
216            if 'proxy' in k.lower():
217                self.env.unset(k)
218
219    def tearDown(self):
220        # Restore all proxy related env vars
221        self.env.__exit__()
222        del self.env
223
224    def test_getproxies_environment_keep_no_proxies(self):
225        self.env.set('NO_PROXY', 'localhost')
226        proxies = urllib.request.getproxies_environment()
227        # getproxies_environment use lowered case truncated (no '_proxy') keys
228        self.assertEqual('localhost', proxies['no'])
229        # List of no_proxies with space.
230        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
231        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
232        self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233        self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234
235    def test_proxy_cgi_ignore(self):
236        try:
237            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
238            proxies = urllib.request.getproxies_environment()
239            self.assertEqual('http://somewhere:3128', proxies['http'])
240            self.env.set('REQUEST_METHOD', 'GET')
241            proxies = urllib.request.getproxies_environment()
242            self.assertNotIn('http', proxies)
243        finally:
244            self.env.unset('REQUEST_METHOD')
245            self.env.unset('HTTP_PROXY')
246
247    def test_proxy_bypass_environment_host_match(self):
248        bypass = urllib.request.proxy_bypass_environment
249        self.env.set('NO_PROXY',
250                     'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
251        self.assertTrue(bypass('localhost'))
252        self.assertTrue(bypass('LocalHost'))                 # MixedCase
253        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
254        self.assertTrue(bypass('newdomain.com:1234'))
255        self.assertTrue(bypass('foo.d.o.t'))                 # issue 29142
256        self.assertTrue(bypass('anotherdomain.com:8888'))
257        self.assertTrue(bypass('www.newdomain.com:1234'))
258        self.assertFalse(bypass('prelocalhost'))
259        self.assertFalse(bypass('newdomain.com'))            # no port
260        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
261
262class ProxyTests_withOrderedEnv(unittest.TestCase):
263
264    def setUp(self):
265        # We need to test conditions, where variable order _is_ significant
266        self._saved_env = os.environ
267        # Monkey patch os.environ, start with empty fake environment
268        os.environ = collections.OrderedDict()
269
270    def tearDown(self):
271        os.environ = self._saved_env
272
273    def test_getproxies_environment_prefer_lowercase(self):
274        # Test lowercase preference with removal
275        os.environ['no_proxy'] = ''
276        os.environ['No_Proxy'] = 'localhost'
277        self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
278        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
279        os.environ['http_proxy'] = ''
280        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
281        proxies = urllib.request.getproxies_environment()
282        self.assertEqual({}, proxies)
283        # Test lowercase preference of proxy bypass and correct matching including ports
284        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
285        os.environ['No_Proxy'] = 'xyz.com'
286        self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
287        self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
288        self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
289        self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
290        self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
291        # Test lowercase preference with replacement
292        os.environ['http_proxy'] = 'http://somewhere:3128'
293        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
294        proxies = urllib.request.getproxies_environment()
295        self.assertEqual('http://somewhere:3128', proxies['http'])
296
297class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
298    """Test urlopen() opening a fake http connection."""
299
300    def check_read(self, ver):
301        self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
302        try:
303            fp = urlopen("http://python.org/")
304            self.assertEqual(fp.readline(), b"Hello!")
305            self.assertEqual(fp.readline(), b"")
306            self.assertEqual(fp.geturl(), 'http://python.org/')
307            self.assertEqual(fp.getcode(), 200)
308        finally:
309            self.unfakehttp()
310
311    def test_url_fragment(self):
312        # Issue #11703: geturl() omits fragments in the original URL.
313        url = 'http://docs.python.org/library/urllib.html#OK'
314        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
315        try:
316            fp = urllib.request.urlopen(url)
317            self.assertEqual(fp.geturl(), url)
318        finally:
319            self.unfakehttp()
320
321    def test_willclose(self):
322        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
323        try:
324            resp = urlopen("http://www.python.org")
325            self.assertTrue(resp.fp.will_close)
326        finally:
327            self.unfakehttp()
328
329    def test_read_0_9(self):
330        # "0.9" response accepted (but not "simple responses" without
331        # a status line)
332        self.check_read(b"0.9")
333
334    def test_read_1_0(self):
335        self.check_read(b"1.0")
336
337    def test_read_1_1(self):
338        self.check_read(b"1.1")
339
340    def test_read_bogus(self):
341        # urlopen() should raise OSError for many error codes.
342        self.fakehttp(b'''HTTP/1.1 401 Authentication Required
343Date: Wed, 02 Jan 2008 03:03:54 GMT
344Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
345Connection: close
346Content-Type: text/html; charset=iso-8859-1
347''')
348        try:
349            self.assertRaises(OSError, urlopen, "http://python.org/")
350        finally:
351            self.unfakehttp()
352
353    def test_invalid_redirect(self):
354        # urlopen() should raise OSError for many error codes.
355        self.fakehttp(b'''HTTP/1.1 302 Found
356Date: Wed, 02 Jan 2008 03:03:54 GMT
357Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
358Location: file://guidocomputer.athome.com:/python/license
359Connection: close
360Content-Type: text/html; charset=iso-8859-1
361''')
362        try:
363            msg = "Redirection to url 'file:"
364            with self.assertRaisesRegex(urllib.error.HTTPError, msg):
365                urlopen("http://python.org/")
366        finally:
367            self.unfakehttp()
368
369    def test_redirect_limit_independent(self):
370        # Ticket #12923: make sure independent requests each use their
371        # own retry limit.
372        for i in range(FancyURLopener().maxtries):
373            self.fakehttp(b'''HTTP/1.1 302 Found
374Location: file://guidocomputer.athome.com:/python/license
375Connection: close
376''')
377            try:
378                self.assertRaises(urllib.error.HTTPError, urlopen,
379                    "http://something")
380            finally:
381                self.unfakehttp()
382
383    def test_empty_socket(self):
384        # urlopen() raises OSError if the underlying socket does not send any
385        # data. (#1680230)
386        self.fakehttp(b'')
387        try:
388            self.assertRaises(OSError, urlopen, "http://something")
389        finally:
390            self.unfakehttp()
391
392    def test_missing_localfile(self):
393        # Test for #10836
394        with self.assertRaises(urllib.error.URLError) as e:
395            urlopen('file://localhost/a/file/which/doesnot/exists.py')
396        self.assertTrue(e.exception.filename)
397        self.assertTrue(e.exception.reason)
398
399    def test_file_notexists(self):
400        fd, tmp_file = tempfile.mkstemp()
401        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
402        try:
403            self.assertTrue(os.path.exists(tmp_file))
404            with urlopen(tmp_fileurl) as fobj:
405                self.assertTrue(fobj)
406        finally:
407            os.close(fd)
408            os.unlink(tmp_file)
409        self.assertFalse(os.path.exists(tmp_file))
410        with self.assertRaises(urllib.error.URLError):
411            urlopen(tmp_fileurl)
412
413    def test_ftp_nohost(self):
414        test_ftp_url = 'ftp:///path'
415        with self.assertRaises(urllib.error.URLError) as e:
416            urlopen(test_ftp_url)
417        self.assertFalse(e.exception.filename)
418        self.assertTrue(e.exception.reason)
419
420    def test_ftp_nonexisting(self):
421        with self.assertRaises(urllib.error.URLError) as e:
422            urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
423        self.assertFalse(e.exception.filename)
424        self.assertTrue(e.exception.reason)
425
426    @patch.object(urllib.request, 'MAXFTPCACHE', 0)
427    def test_ftp_cache_pruning(self):
428        self.fakeftp()
429        try:
430            urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
431            urlopen('ftp://localhost')
432        finally:
433            self.unfakeftp()
434
435
436    def test_userpass_inurl(self):
437        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
438        try:
439            fp = urlopen("http://user:pass@python.org/")
440            self.assertEqual(fp.readline(), b"Hello!")
441            self.assertEqual(fp.readline(), b"")
442            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
443            self.assertEqual(fp.getcode(), 200)
444        finally:
445            self.unfakehttp()
446
447    def test_userpass_inurl_w_spaces(self):
448        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
449        try:
450            userpass = "a b:c d"
451            url = "http://{}@python.org/".format(userpass)
452            fakehttp_wrapper = http.client.HTTPConnection
453            authorization = ("Authorization: Basic %s\r\n" %
454                             b64encode(userpass.encode("ASCII")).decode("ASCII"))
455            fp = urlopen(url)
456            # The authorization header must be in place
457            self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
458            self.assertEqual(fp.readline(), b"Hello!")
459            self.assertEqual(fp.readline(), b"")
460            # the spaces are quoted in URL so no match
461            self.assertNotEqual(fp.geturl(), url)
462            self.assertEqual(fp.getcode(), 200)
463        finally:
464            self.unfakehttp()
465
466    def test_URLopener_deprecation(self):
467        with support.check_warnings(('',DeprecationWarning)):
468            urllib.request.URLopener()
469
470    @unittest.skipUnless(ssl, "ssl module required")
471    def test_cafile_and_context(self):
472        context = ssl.create_default_context()
473        with support.check_warnings(('', DeprecationWarning)):
474            with self.assertRaises(ValueError):
475                urllib.request.urlopen(
476                    "https://localhost", cafile="/nonexistent/path", context=context
477                )
478
479class urlopen_DataTests(unittest.TestCase):
480    """Test urlopen() opening a data URL."""
481
482    def setUp(self):
483        # text containing URL special- and unicode-characters
484        self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
485        # 2x1 pixel RGB PNG image with one black and one white pixel
486        self.image = (
487            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
488            b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
489            b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
490            b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
491
492        self.text_url = (
493            "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
494            "D%26%20%C3%B6%20%C3%84%20")
495        self.text_url_base64 = (
496            "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
497            "sJT0mIPYgxCA%3D")
498        # base64 encoded data URL that contains ignorable spaces,
499        # such as "\n", " ", "%0A", and "%20".
500        self.image_url = (
501            "\n"
502            "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
503            "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
504
505        self.text_url_resp = urllib.request.urlopen(self.text_url)
506        self.text_url_base64_resp = urllib.request.urlopen(
507            self.text_url_base64)
508        self.image_url_resp = urllib.request.urlopen(self.image_url)
509
510    def test_interface(self):
511        # Make sure object returned by urlopen() has the specified methods
512        for attr in ("read", "readline", "readlines",
513                     "close", "info", "geturl", "getcode", "__iter__"):
514            self.assertTrue(hasattr(self.text_url_resp, attr),
515                         "object returned by urlopen() lacks %s attribute" %
516                         attr)
517
518    def test_info(self):
519        self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
520        self.assertEqual(self.text_url_base64_resp.info().get_params(),
521            [('text/plain', ''), ('charset', 'ISO-8859-1')])
522        self.assertEqual(self.image_url_resp.info()['content-length'],
523            str(len(self.image)))
524        self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
525            [('text/plain', ''), ('charset', 'US-ASCII')])
526
527    def test_geturl(self):
528        self.assertEqual(self.text_url_resp.geturl(), self.text_url)
529        self.assertEqual(self.text_url_base64_resp.geturl(),
530            self.text_url_base64)
531        self.assertEqual(self.image_url_resp.geturl(), self.image_url)
532
533    def test_read_text(self):
534        self.assertEqual(self.text_url_resp.read().decode(
535            dict(self.text_url_resp.info().get_params())['charset']), self.text)
536
537    def test_read_text_base64(self):
538        self.assertEqual(self.text_url_base64_resp.read().decode(
539            dict(self.text_url_base64_resp.info().get_params())['charset']),
540            self.text)
541
542    def test_read_image(self):
543        self.assertEqual(self.image_url_resp.read(), self.image)
544
545    def test_missing_comma(self):
546        self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
547
548    def test_invalid_base64_data(self):
549        # missing padding character
550        self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
551
552class urlretrieve_FileTests(unittest.TestCase):
553    """Test urllib.urlretrieve() on local files"""
554
555    def setUp(self):
556        # Create a list of temporary files. Each item in the list is a file
557        # name (absolute path or relative to the current working directory).
558        # All files in this list will be deleted in the tearDown method. Note,
559        # this only helps to makes sure temporary files get deleted, but it
560        # does nothing about trying to close files that may still be open. It
561        # is the responsibility of the developer to properly close files even
562        # when exceptional conditions occur.
563        self.tempFiles = []
564
565        # Create a temporary file.
566        self.registerFileForCleanUp(support.TESTFN)
567        self.text = b'testing urllib.urlretrieve'
568        try:
569            FILE = open(support.TESTFN, 'wb')
570            FILE.write(self.text)
571            FILE.close()
572        finally:
573            try: FILE.close()
574            except: pass
575
576    def tearDown(self):
577        # Delete the temporary files.
578        for each in self.tempFiles:
579            try: os.remove(each)
580            except: pass
581
582    def constructLocalFileUrl(self, filePath):
583        filePath = os.path.abspath(filePath)
584        try:
585            filePath.encode("utf-8")
586        except UnicodeEncodeError:
587            raise unittest.SkipTest("filePath is not encodable to utf8")
588        return "file://%s" % urllib.request.pathname2url(filePath)
589
590    def createNewTempFile(self, data=b""):
591        """Creates a new temporary file containing the specified data,
592        registers the file for deletion during the test fixture tear down, and
593        returns the absolute path of the file."""
594
595        newFd, newFilePath = tempfile.mkstemp()
596        try:
597            self.registerFileForCleanUp(newFilePath)
598            newFile = os.fdopen(newFd, "wb")
599            newFile.write(data)
600            newFile.close()
601        finally:
602            try: newFile.close()
603            except: pass
604        return newFilePath
605
606    def registerFileForCleanUp(self, fileName):
607        self.tempFiles.append(fileName)
608
609    def test_basic(self):
610        # Make sure that a local file just gets its own location returned and
611        # a headers value is returned.
612        result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
613        self.assertEqual(result[0], support.TESTFN)
614        self.assertIsInstance(result[1], email.message.Message,
615                              "did not get an email.message.Message instance "
616                              "as second returned value")
617
618    def test_copy(self):
619        # Test that setting the filename argument works.
620        second_temp = "%s.2" % support.TESTFN
621        self.registerFileForCleanUp(second_temp)
622        result = urllib.request.urlretrieve(self.constructLocalFileUrl(
623            support.TESTFN), second_temp)
624        self.assertEqual(second_temp, result[0])
625        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
626                                                  "made")
627        FILE = open(second_temp, 'rb')
628        try:
629            text = FILE.read()
630            FILE.close()
631        finally:
632            try: FILE.close()
633            except: pass
634        self.assertEqual(self.text, text)
635
636    def test_reporthook(self):
637        # Make sure that the reporthook works.
638        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
639            self.assertIsInstance(block_count, int)
640            self.assertIsInstance(block_read_size, int)
641            self.assertIsInstance(file_size, int)
642            self.assertEqual(block_count, count_holder[0])
643            count_holder[0] = count_holder[0] + 1
644        second_temp = "%s.2" % support.TESTFN
645        self.registerFileForCleanUp(second_temp)
646        urllib.request.urlretrieve(
647            self.constructLocalFileUrl(support.TESTFN),
648            second_temp, hooktester)
649
650    def test_reporthook_0_bytes(self):
651        # Test on zero length file. Should call reporthook only 1 time.
652        report = []
653        def hooktester(block_count, block_read_size, file_size, _report=report):
654            _report.append((block_count, block_read_size, file_size))
655        srcFileName = self.createNewTempFile()
656        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
657            support.TESTFN, hooktester)
658        self.assertEqual(len(report), 1)
659        self.assertEqual(report[0][2], 0)
660
661    def test_reporthook_5_bytes(self):
662        # Test on 5 byte file. Should call reporthook only 2 times (once when
663        # the "network connection" is established and once when the block is
664        # read).
665        report = []
666        def hooktester(block_count, block_read_size, file_size, _report=report):
667            _report.append((block_count, block_read_size, file_size))
668        srcFileName = self.createNewTempFile(b"x" * 5)
669        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
670            support.TESTFN, hooktester)
671        self.assertEqual(len(report), 2)
672        self.assertEqual(report[0][2], 5)
673        self.assertEqual(report[1][2], 5)
674
675    def test_reporthook_8193_bytes(self):
676        # Test on 8193 byte file. Should call reporthook only 3 times (once
677        # when the "network connection" is established, once for the next 8192
678        # bytes, and once for the last byte).
679        report = []
680        def hooktester(block_count, block_read_size, file_size, _report=report):
681            _report.append((block_count, block_read_size, file_size))
682        srcFileName = self.createNewTempFile(b"x" * 8193)
683        urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
684            support.TESTFN, hooktester)
685        self.assertEqual(len(report), 3)
686        self.assertEqual(report[0][2], 8193)
687        self.assertEqual(report[0][1], 8192)
688        self.assertEqual(report[1][1], 8192)
689        self.assertEqual(report[2][1], 8192)
690
691
692class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
693    """Test urllib.urlretrieve() using fake http connections"""
694
695    def test_short_content_raises_ContentTooShortError(self):
696        self.fakehttp(b'''HTTP/1.1 200 OK
697Date: Wed, 02 Jan 2008 03:03:54 GMT
698Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
699Connection: close
700Content-Length: 100
701Content-Type: text/html; charset=iso-8859-1
702
703FF
704''')
705
706        def _reporthook(par1, par2, par3):
707            pass
708
709        with self.assertRaises(urllib.error.ContentTooShortError):
710            try:
711                urllib.request.urlretrieve('http://example.com/',
712                                           reporthook=_reporthook)
713            finally:
714                self.unfakehttp()
715
716    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
717        self.fakehttp(b'''HTTP/1.1 200 OK
718Date: Wed, 02 Jan 2008 03:03:54 GMT
719Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
720Connection: close
721Content-Length: 100
722Content-Type: text/html; charset=iso-8859-1
723
724FF
725''')
726        with self.assertRaises(urllib.error.ContentTooShortError):
727            try:
728                urllib.request.urlretrieve('http://example.com/')
729            finally:
730                self.unfakehttp()
731
732
733class QuotingTests(unittest.TestCase):
734    r"""Tests for urllib.quote() and urllib.quote_plus()
735
736    According to RFC 2396 (Uniform Resource Identifiers), to escape a
737    character you write it as '%' + <2 character US-ASCII hex value>.
738    The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
739    character properly. Case does not matter on the hex letters.
740
741    The various character sets specified are:
742
743    Reserved characters : ";/?:@&=+$,"
744        Have special meaning in URIs and must be escaped if not being used for
745        their special meaning
746    Data characters : letters, digits, and "-_.!~*'()"
747        Unreserved and do not need to be escaped; can be, though, if desired
748    Control characters : 0x00 - 0x1F, 0x7F
749        Have no use in URIs so must be escaped
750    space : 0x20
751        Must be escaped
752    Delimiters : '<>#%"'
753        Must be escaped
754    Unwise : "{}|\^[]`"
755        Must be escaped
756
757    """
758
759    def test_never_quote(self):
760        # Make sure quote() does not quote letters, digits, and "_,.-"
761        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
762                                 "abcdefghijklmnopqrstuvwxyz",
763                                 "0123456789",
764                                 "_.-"])
765        result = urllib.parse.quote(do_not_quote)
766        self.assertEqual(do_not_quote, result,
767                         "using quote(): %r != %r" % (do_not_quote, result))
768        result = urllib.parse.quote_plus(do_not_quote)
769        self.assertEqual(do_not_quote, result,
770                        "using quote_plus(): %r != %r" % (do_not_quote, result))
771
772    def test_default_safe(self):
773        # Test '/' is default value for 'safe' parameter
774        self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
775
776    def test_safe(self):
777        # Test setting 'safe' parameter does what it should do
778        quote_by_default = "<>"
779        result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
780        self.assertEqual(quote_by_default, result,
781                         "using quote(): %r != %r" % (quote_by_default, result))
782        result = urllib.parse.quote_plus(quote_by_default,
783                                         safe=quote_by_default)
784        self.assertEqual(quote_by_default, result,
785                         "using quote_plus(): %r != %r" %
786                         (quote_by_default, result))
787        # Safe expressed as bytes rather than str
788        result = urllib.parse.quote(quote_by_default, safe=b"<>")
789        self.assertEqual(quote_by_default, result,
790                         "using quote(): %r != %r" % (quote_by_default, result))
791        # "Safe" non-ASCII characters should have no effect
792        # (Since URIs are not allowed to have non-ASCII characters)
793        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
794        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
795        self.assertEqual(expect, result,
796                         "using quote(): %r != %r" %
797                         (expect, result))
798        # Same as above, but using a bytes rather than str
799        result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
800        expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
801        self.assertEqual(expect, result,
802                         "using quote(): %r != %r" %
803                         (expect, result))
804
805    def test_default_quoting(self):
806        # Make sure all characters that should be quoted are by default sans
807        # space (separate test for that).
808        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
809        should_quote.append(r'<>#%"{}|\^[]`')
810        should_quote.append(chr(127)) # For 0x7F
811        should_quote = ''.join(should_quote)
812        for char in should_quote:
813            result = urllib.parse.quote(char)
814            self.assertEqual(hexescape(char), result,
815                             "using quote(): "
816                             "%s should be escaped to %s, not %s" %
817                             (char, hexescape(char), result))
818            result = urllib.parse.quote_plus(char)
819            self.assertEqual(hexescape(char), result,
820                             "using quote_plus(): "
821                             "%s should be escapes to %s, not %s" %
822                             (char, hexescape(char), result))
823        del should_quote
824        partial_quote = "ab[]cd"
825        expected = "ab%5B%5Dcd"
826        result = urllib.parse.quote(partial_quote)
827        self.assertEqual(expected, result,
828                         "using quote(): %r != %r" % (expected, result))
829        result = urllib.parse.quote_plus(partial_quote)
830        self.assertEqual(expected, result,
831                         "using quote_plus(): %r != %r" % (expected, result))
832
833    def test_quoting_space(self):
834        # Make sure quote() and quote_plus() handle spaces as specified in
835        # their unique way
836        result = urllib.parse.quote(' ')
837        self.assertEqual(result, hexescape(' '),
838                         "using quote(): %r != %r" % (result, hexescape(' ')))
839        result = urllib.parse.quote_plus(' ')
840        self.assertEqual(result, '+',
841                         "using quote_plus(): %r != +" % result)
842        given = "a b cd e f"
843        expect = given.replace(' ', hexescape(' '))
844        result = urllib.parse.quote(given)
845        self.assertEqual(expect, result,
846                         "using quote(): %r != %r" % (expect, result))
847        expect = given.replace(' ', '+')
848        result = urllib.parse.quote_plus(given)
849        self.assertEqual(expect, result,
850                         "using quote_plus(): %r != %r" % (expect, result))
851
852    def test_quoting_plus(self):
853        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
854                         'alpha%2Bbeta+gamma')
855        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
856                         'alpha+beta+gamma')
857        # Test with bytes
858        self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
859                         'alpha%2Bbeta+gamma')
860        # Test with safe bytes
861        self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
862                         'alpha+beta+gamma')
863
864    def test_quote_bytes(self):
865        # Bytes should quote directly to percent-encoded values
866        given = b"\xa2\xd8ab\xff"
867        expect = "%A2%D8ab%FF"
868        result = urllib.parse.quote(given)
869        self.assertEqual(expect, result,
870                         "using quote(): %r != %r" % (expect, result))
871        # Encoding argument should raise type error on bytes input
872        self.assertRaises(TypeError, urllib.parse.quote, given,
873                            encoding="latin-1")
874        # quote_from_bytes should work the same
875        result = urllib.parse.quote_from_bytes(given)
876        self.assertEqual(expect, result,
877                         "using quote_from_bytes(): %r != %r"
878                         % (expect, result))
879
880    def test_quote_with_unicode(self):
881        # Characters in Latin-1 range, encoded by default in UTF-8
882        given = "\xa2\xd8ab\xff"
883        expect = "%C2%A2%C3%98ab%C3%BF"
884        result = urllib.parse.quote(given)
885        self.assertEqual(expect, result,
886                         "using quote(): %r != %r" % (expect, result))
887        # Characters in Latin-1 range, encoded by with None (default)
888        result = urllib.parse.quote(given, encoding=None, errors=None)
889        self.assertEqual(expect, result,
890                         "using quote(): %r != %r" % (expect, result))
891        # Characters in Latin-1 range, encoded with Latin-1
892        given = "\xa2\xd8ab\xff"
893        expect = "%A2%D8ab%FF"
894        result = urllib.parse.quote(given, encoding="latin-1")
895        self.assertEqual(expect, result,
896                         "using quote(): %r != %r" % (expect, result))
897        # Characters in BMP, encoded by default in UTF-8
898        given = "\u6f22\u5b57"              # "Kanji"
899        expect = "%E6%BC%A2%E5%AD%97"
900        result = urllib.parse.quote(given)
901        self.assertEqual(expect, result,
902                         "using quote(): %r != %r" % (expect, result))
903        # Characters in BMP, encoded with Latin-1
904        given = "\u6f22\u5b57"
905        self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
906                                    encoding="latin-1")
907        # Characters in BMP, encoded with Latin-1, with replace error handling
908        given = "\u6f22\u5b57"
909        expect = "%3F%3F"                   # "??"
910        result = urllib.parse.quote(given, encoding="latin-1",
911                                    errors="replace")
912        self.assertEqual(expect, result,
913                         "using quote(): %r != %r" % (expect, result))
914        # Characters in BMP, Latin-1, with xmlcharref error handling
915        given = "\u6f22\u5b57"
916        expect = "%26%2328450%3B%26%2323383%3B"     # "&#28450;&#23383;"
917        result = urllib.parse.quote(given, encoding="latin-1",
918                                    errors="xmlcharrefreplace")
919        self.assertEqual(expect, result,
920                         "using quote(): %r != %r" % (expect, result))
921
922    def test_quote_plus_with_unicode(self):
923        # Encoding (latin-1) test for quote_plus
924        given = "\xa2\xd8 \xff"
925        expect = "%A2%D8+%FF"
926        result = urllib.parse.quote_plus(given, encoding="latin-1")
927        self.assertEqual(expect, result,
928                         "using quote_plus(): %r != %r" % (expect, result))
929        # Errors test for quote_plus
930        given = "ab\u6f22\u5b57 cd"
931        expect = "ab%3F%3F+cd"
932        result = urllib.parse.quote_plus(given, encoding="latin-1",
933                                         errors="replace")
934        self.assertEqual(expect, result,
935                         "using quote_plus(): %r != %r" % (expect, result))
936
937
938class UnquotingTests(unittest.TestCase):
939    """Tests for unquote() and unquote_plus()
940
941    See the doc string for quoting_Tests for details on quoting and such.
942
943    """
944
945    def test_unquoting(self):
946        # Make sure unquoting of all ASCII values works
947        escape_list = []
948        for num in range(128):
949            given = hexescape(chr(num))
950            expect = chr(num)
951            result = urllib.parse.unquote(given)
952            self.assertEqual(expect, result,
953                             "using unquote(): %r != %r" % (expect, result))
954            result = urllib.parse.unquote_plus(given)
955            self.assertEqual(expect, result,
956                             "using unquote_plus(): %r != %r" %
957                             (expect, result))
958            escape_list.append(given)
959        escape_string = ''.join(escape_list)
960        del escape_list
961        result = urllib.parse.unquote(escape_string)
962        self.assertEqual(result.count('%'), 1,
963                         "using unquote(): not all characters escaped: "
964                         "%s" % result)
965        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
966        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
967        with support.check_warnings(('', BytesWarning), quiet=True):
968            self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
969
970    def test_unquoting_badpercent(self):
971        # Test unquoting on bad percent-escapes
972        given = '%xab'
973        expect = given
974        result = urllib.parse.unquote(given)
975        self.assertEqual(expect, result, "using unquote(): %r != %r"
976                         % (expect, result))
977        given = '%x'
978        expect = given
979        result = urllib.parse.unquote(given)
980        self.assertEqual(expect, result, "using unquote(): %r != %r"
981                         % (expect, result))
982        given = '%'
983        expect = given
984        result = urllib.parse.unquote(given)
985        self.assertEqual(expect, result, "using unquote(): %r != %r"
986                         % (expect, result))
987        # unquote_to_bytes
988        given = '%xab'
989        expect = bytes(given, 'ascii')
990        result = urllib.parse.unquote_to_bytes(given)
991        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
992                         % (expect, result))
993        given = '%x'
994        expect = bytes(given, 'ascii')
995        result = urllib.parse.unquote_to_bytes(given)
996        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
997                         % (expect, result))
998        given = '%'
999        expect = bytes(given, 'ascii')
1000        result = urllib.parse.unquote_to_bytes(given)
1001        self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1002                         % (expect, result))
1003        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1004        self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1005
1006    def test_unquoting_mixed_case(self):
1007        # Test unquoting on mixed-case hex digits in the percent-escapes
1008        given = '%Ab%eA'
1009        expect = b'\xab\xea'
1010        result = urllib.parse.unquote_to_bytes(given)
1011        self.assertEqual(expect, result,
1012                         "using unquote_to_bytes(): %r != %r"
1013                         % (expect, result))
1014
1015    def test_unquoting_parts(self):
1016        # Make sure unquoting works when have non-quoted characters
1017        # interspersed
1018        given = 'ab%sd' % hexescape('c')
1019        expect = "abcd"
1020        result = urllib.parse.unquote(given)
1021        self.assertEqual(expect, result,
1022                         "using quote(): %r != %r" % (expect, result))
1023        result = urllib.parse.unquote_plus(given)
1024        self.assertEqual(expect, result,
1025                         "using unquote_plus(): %r != %r" % (expect, result))
1026
1027    def test_unquoting_plus(self):
1028        # Test difference between unquote() and unquote_plus()
1029        given = "are+there+spaces..."
1030        expect = given
1031        result = urllib.parse.unquote(given)
1032        self.assertEqual(expect, result,
1033                         "using unquote(): %r != %r" % (expect, result))
1034        expect = given.replace('+', ' ')
1035        result = urllib.parse.unquote_plus(given)
1036        self.assertEqual(expect, result,
1037                         "using unquote_plus(): %r != %r" % (expect, result))
1038
1039    def test_unquote_to_bytes(self):
1040        given = 'br%C3%BCckner_sapporo_20050930.doc'
1041        expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1042        result = urllib.parse.unquote_to_bytes(given)
1043        self.assertEqual(expect, result,
1044                         "using unquote_to_bytes(): %r != %r"
1045                         % (expect, result))
1046        # Test on a string with unescaped non-ASCII characters
1047        # (Technically an invalid URI; expect those characters to be UTF-8
1048        # encoded).
1049        result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1050        expect = b'\xe6\xbc\xa2\xc3\xbc'    # UTF-8 for "\u6f22\u00fc"
1051        self.assertEqual(expect, result,
1052                         "using unquote_to_bytes(): %r != %r"
1053                         % (expect, result))
1054        # Test with a bytes as input
1055        given = b'%A2%D8ab%FF'
1056        expect = b'\xa2\xd8ab\xff'
1057        result = urllib.parse.unquote_to_bytes(given)
1058        self.assertEqual(expect, result,
1059                         "using unquote_to_bytes(): %r != %r"
1060                         % (expect, result))
1061        # Test with a bytes as input, with unescaped non-ASCII bytes
1062        # (Technically an invalid URI; expect those bytes to be preserved)
1063        given = b'%A2\xd8ab%FF'
1064        expect = b'\xa2\xd8ab\xff'
1065        result = urllib.parse.unquote_to_bytes(given)
1066        self.assertEqual(expect, result,
1067                         "using unquote_to_bytes(): %r != %r"
1068                         % (expect, result))
1069
1070    def test_unquote_with_unicode(self):
1071        # Characters in the Latin-1 range, encoded with UTF-8
1072        given = 'br%C3%BCckner_sapporo_20050930.doc'
1073        expect = 'br\u00fcckner_sapporo_20050930.doc'
1074        result = urllib.parse.unquote(given)
1075        self.assertEqual(expect, result,
1076                         "using unquote(): %r != %r" % (expect, result))
1077        # Characters in the Latin-1 range, encoded with None (default)
1078        result = urllib.parse.unquote(given, encoding=None, errors=None)
1079        self.assertEqual(expect, result,
1080                         "using unquote(): %r != %r" % (expect, result))
1081
1082        # Characters in the Latin-1 range, encoded with Latin-1
1083        result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1084                                      encoding="latin-1")
1085        expect = 'br\u00fcckner_sapporo_20050930.doc'
1086        self.assertEqual(expect, result,
1087                         "using unquote(): %r != %r" % (expect, result))
1088
1089        # Characters in BMP, encoded with UTF-8
1090        given = "%E6%BC%A2%E5%AD%97"
1091        expect = "\u6f22\u5b57"             # "Kanji"
1092        result = urllib.parse.unquote(given)
1093        self.assertEqual(expect, result,
1094                         "using unquote(): %r != %r" % (expect, result))
1095
1096        # Decode with UTF-8, invalid sequence
1097        given = "%F3%B1"
1098        expect = "\ufffd"                   # Replacement character
1099        result = urllib.parse.unquote(given)
1100        self.assertEqual(expect, result,
1101                         "using unquote(): %r != %r" % (expect, result))
1102
1103        # Decode with UTF-8, invalid sequence, replace errors
1104        result = urllib.parse.unquote(given, errors="replace")
1105        self.assertEqual(expect, result,
1106                         "using unquote(): %r != %r" % (expect, result))
1107
1108        # Decode with UTF-8, invalid sequence, ignoring errors
1109        given = "%F3%B1"
1110        expect = ""
1111        result = urllib.parse.unquote(given, errors="ignore")
1112        self.assertEqual(expect, result,
1113                         "using unquote(): %r != %r" % (expect, result))
1114
1115        # A mix of non-ASCII and percent-encoded characters, UTF-8
1116        result = urllib.parse.unquote("\u6f22%C3%BC")
1117        expect = '\u6f22\u00fc'
1118        self.assertEqual(expect, result,
1119                         "using unquote(): %r != %r" % (expect, result))
1120
1121        # A mix of non-ASCII and percent-encoded characters, Latin-1
1122        # (Note, the string contains non-Latin-1-representable characters)
1123        result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1124        expect = '\u6f22\u00fc'
1125        self.assertEqual(expect, result,
1126                         "using unquote(): %r != %r" % (expect, result))
1127
1128class urlencode_Tests(unittest.TestCase):
1129    """Tests for urlencode()"""
1130
1131    def help_inputtype(self, given, test_type):
1132        """Helper method for testing different input types.
1133
1134        'given' must lead to only the pairs:
1135            * 1st, 1
1136            * 2nd, 2
1137            * 3rd, 3
1138
1139        Test cannot assume anything about order.  Docs make no guarantee and
1140        have possible dictionary input.
1141
1142        """
1143        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
1144        result = urllib.parse.urlencode(given)
1145        for expected in expect_somewhere:
1146            self.assertIn(expected, result,
1147                         "testing %s: %s not found in %s" %
1148                         (test_type, expected, result))
1149        self.assertEqual(result.count('&'), 2,
1150                         "testing %s: expected 2 '&'s; got %s" %
1151                         (test_type, result.count('&')))
1152        amp_location = result.index('&')
1153        on_amp_left = result[amp_location - 1]
1154        on_amp_right = result[amp_location + 1]
1155        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
1156                     "testing %s: '&' not located in proper place in %s" %
1157                     (test_type, result))
1158        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1159                         "testing %s: "
1160                         "unexpected number of characters: %s != %s" %
1161                         (test_type, len(result), (5 * 3) + 2))
1162
1163    def test_using_mapping(self):
1164        # Test passing in a mapping object as an argument.
1165        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1166                            "using dict as input type")
1167
1168    def test_using_sequence(self):
1169        # Test passing in a sequence of two-item sequences as an argument.
1170        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1171                            "using sequence of two-item tuples as input")
1172
1173    def test_quoting(self):
1174        # Make sure keys and values are quoted using quote_plus()
1175        given = {"&":"="}
1176        expect = "%s=%s" % (hexescape('&'), hexescape('='))
1177        result = urllib.parse.urlencode(given)
1178        self.assertEqual(expect, result)
1179        given = {"key name":"A bunch of pluses"}
1180        expect = "key+name=A+bunch+of+pluses"
1181        result = urllib.parse.urlencode(given)
1182        self.assertEqual(expect, result)
1183
1184    def test_doseq(self):
1185        # Test that passing True for 'doseq' parameter works correctly
1186        given = {'sequence':['1', '2', '3']}
1187        expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1188        result = urllib.parse.urlencode(given)
1189        self.assertEqual(expect, result)
1190        result = urllib.parse.urlencode(given, True)
1191        for value in given["sequence"]:
1192            expect = "sequence=%s" % value
1193            self.assertIn(expect, result)
1194        self.assertEqual(result.count('&'), 2,
1195                         "Expected 2 '&'s, got %s" % result.count('&'))
1196
1197    def test_empty_sequence(self):
1198        self.assertEqual("", urllib.parse.urlencode({}))
1199        self.assertEqual("", urllib.parse.urlencode([]))
1200
1201    def test_nonstring_values(self):
1202        self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1203        self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1204
1205    def test_nonstring_seq_values(self):
1206        self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1207        self.assertEqual("a=None&a=a",
1208                         urllib.parse.urlencode({"a": [None, "a"]}, True))
1209        data = collections.OrderedDict([("a", 1), ("b", 1)])
1210        self.assertEqual("a=a&a=b",
1211                         urllib.parse.urlencode({"a": data}, True))
1212
1213    def test_urlencode_encoding(self):
1214        # ASCII encoding. Expect %3F with errors="replace'
1215        given = (('\u00a0', '\u00c1'),)
1216        expect = '%3F=%3F'
1217        result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1218        self.assertEqual(expect, result)
1219
1220        # Default is UTF-8 encoding.
1221        given = (('\u00a0', '\u00c1'),)
1222        expect = '%C2%A0=%C3%81'
1223        result = urllib.parse.urlencode(given)
1224        self.assertEqual(expect, result)
1225
1226        # Latin-1 encoding.
1227        given = (('\u00a0', '\u00c1'),)
1228        expect = '%A0=%C1'
1229        result = urllib.parse.urlencode(given, encoding="latin-1")
1230        self.assertEqual(expect, result)
1231
1232    def test_urlencode_encoding_doseq(self):
1233        # ASCII Encoding. Expect %3F with errors="replace'
1234        given = (('\u00a0', '\u00c1'),)
1235        expect = '%3F=%3F'
1236        result = urllib.parse.urlencode(given, doseq=True,
1237                                        encoding="ASCII", errors="replace")
1238        self.assertEqual(expect, result)
1239
1240        # ASCII Encoding. On a sequence of values.
1241        given = (("\u00a0", (1, "\u00c1")),)
1242        expect = '%3F=1&%3F=%3F'
1243        result = urllib.parse.urlencode(given, True,
1244                                        encoding="ASCII", errors="replace")
1245        self.assertEqual(expect, result)
1246
1247        # Utf-8
1248        given = (("\u00a0", "\u00c1"),)
1249        expect = '%C2%A0=%C3%81'
1250        result = urllib.parse.urlencode(given, True)
1251        self.assertEqual(expect, result)
1252
1253        given = (("\u00a0", (42, "\u00c1")),)
1254        expect = '%C2%A0=42&%C2%A0=%C3%81'
1255        result = urllib.parse.urlencode(given, True)
1256        self.assertEqual(expect, result)
1257
1258        # latin-1
1259        given = (("\u00a0", "\u00c1"),)
1260        expect = '%A0=%C1'
1261        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1262        self.assertEqual(expect, result)
1263
1264        given = (("\u00a0", (42, "\u00c1")),)
1265        expect = '%A0=42&%A0=%C1'
1266        result = urllib.parse.urlencode(given, True, encoding="latin-1")
1267        self.assertEqual(expect, result)
1268
1269    def test_urlencode_bytes(self):
1270        given = ((b'\xa0\x24', b'\xc1\x24'),)
1271        expect = '%A0%24=%C1%24'
1272        result = urllib.parse.urlencode(given)
1273        self.assertEqual(expect, result)
1274        result = urllib.parse.urlencode(given, True)
1275        self.assertEqual(expect, result)
1276
1277        # Sequence of values
1278        given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1279        expect = '%A0%24=42&%A0%24=%C1%24'
1280        result = urllib.parse.urlencode(given, True)
1281        self.assertEqual(expect, result)
1282
1283    def test_urlencode_encoding_safe_parameter(self):
1284
1285        # Send '$' (\x24) as safe character
1286        # Default utf-8 encoding
1287
1288        given = ((b'\xa0\x24', b'\xc1\x24'),)
1289        result = urllib.parse.urlencode(given, safe=":$")
1290        expect = '%A0$=%C1$'
1291        self.assertEqual(expect, result)
1292
1293        given = ((b'\xa0\x24', b'\xc1\x24'),)
1294        result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1295        expect = '%A0$=%C1$'
1296        self.assertEqual(expect, result)
1297
1298        # Safe parameter in sequence
1299        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1300        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1301        result = urllib.parse.urlencode(given, True, safe=":$")
1302        self.assertEqual(expect, result)
1303
1304        # Test all above in latin-1 encoding
1305
1306        given = ((b'\xa0\x24', b'\xc1\x24'),)
1307        result = urllib.parse.urlencode(given, safe=":$",
1308                                        encoding="latin-1")
1309        expect = '%A0$=%C1$'
1310        self.assertEqual(expect, result)
1311
1312        given = ((b'\xa0\x24', b'\xc1\x24'),)
1313        expect = '%A0$=%C1$'
1314        result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1315                                        encoding="latin-1")
1316
1317        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1318        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1319        result = urllib.parse.urlencode(given, True, safe=":$",
1320                                        encoding="latin-1")
1321        self.assertEqual(expect, result)
1322
1323class Pathname_Tests(unittest.TestCase):
1324    """Test pathname2url() and url2pathname()"""
1325
1326    def test_basic(self):
1327        # Make sure simple tests pass
1328        expected_path = os.path.join("parts", "of", "a", "path")
1329        expected_url = "parts/of/a/path"
1330        result = urllib.request.pathname2url(expected_path)
1331        self.assertEqual(expected_url, result,
1332                         "pathname2url() failed; %s != %s" %
1333                         (result, expected_url))
1334        result = urllib.request.url2pathname(expected_url)
1335        self.assertEqual(expected_path, result,
1336                         "url2pathame() failed; %s != %s" %
1337                         (result, expected_path))
1338
1339    def test_quoting(self):
1340        # Test automatic quoting and unquoting works for pathnam2url() and
1341        # url2pathname() respectively
1342        given = os.path.join("needs", "quot=ing", "here")
1343        expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1344        result = urllib.request.pathname2url(given)
1345        self.assertEqual(expect, result,
1346                         "pathname2url() failed; %s != %s" %
1347                         (expect, result))
1348        expect = given
1349        result = urllib.request.url2pathname(result)
1350        self.assertEqual(expect, result,
1351                         "url2pathname() failed; %s != %s" %
1352                         (expect, result))
1353        given = os.path.join("make sure", "using_quote")
1354        expect = "%s/using_quote" % urllib.parse.quote("make sure")
1355        result = urllib.request.pathname2url(given)
1356        self.assertEqual(expect, result,
1357                         "pathname2url() failed; %s != %s" %
1358                         (expect, result))
1359        given = "make+sure/using_unquote"
1360        expect = os.path.join("make+sure", "using_unquote")
1361        result = urllib.request.url2pathname(given)
1362        self.assertEqual(expect, result,
1363                         "url2pathname() failed; %s != %s" %
1364                         (expect, result))
1365
1366    @unittest.skipUnless(sys.platform == 'win32',
1367                         'test specific to the urllib.url2path function.')
1368    def test_ntpath(self):
1369        given = ('/C:/', '///C:/', '/C|//')
1370        expect = 'C:\\'
1371        for url in given:
1372            result = urllib.request.url2pathname(url)
1373            self.assertEqual(expect, result,
1374                             'urllib.request..url2pathname() failed; %s != %s' %
1375                             (expect, result))
1376        given = '///C|/path'
1377        expect = 'C:\\path'
1378        result = urllib.request.url2pathname(given)
1379        self.assertEqual(expect, result,
1380                         'urllib.request.url2pathname() failed; %s != %s' %
1381                         (expect, result))
1382
1383class Utility_Tests(unittest.TestCase):
1384    """Testcase to test the various utility functions in the urllib."""
1385
1386    def test_thishost(self):
1387        """Test the urllib.request.thishost utility function returns a tuple"""
1388        self.assertIsInstance(urllib.request.thishost(), tuple)
1389
1390
1391class URLopener_Tests(unittest.TestCase):
1392    """Testcase to test the open method of URLopener class."""
1393
1394    def test_quoted_open(self):
1395        class DummyURLopener(urllib.request.URLopener):
1396            def open_spam(self, url):
1397                return url
1398        with support.check_warnings(
1399                ('DummyURLopener style of invoking requests is deprecated.',
1400                DeprecationWarning)):
1401            self.assertEqual(DummyURLopener().open(
1402                'spam://example/ /'),'//example/%20/')
1403
1404            # test the safe characters are not quoted by urlopen
1405            self.assertEqual(DummyURLopener().open(
1406                "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1407                "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1408
1409# Just commented them out.
1410# Can't really tell why keep failing in windows and sparc.
1411# Everywhere else they work ok, but on those machines, sometimes
1412# fail in one of the tests, sometimes in other. I have a linux, and
1413# the tests go ok.
1414# If anybody has one of the problematic environments, please help!
1415# .   Facundo
1416#
1417# def server(evt):
1418#     import socket, time
1419#     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1420#     serv.settimeout(3)
1421#     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1422#     serv.bind(("", 9093))
1423#     serv.listen()
1424#     try:
1425#         conn, addr = serv.accept()
1426#         conn.send("1 Hola mundo\n")
1427#         cantdata = 0
1428#         while cantdata < 13:
1429#             data = conn.recv(13-cantdata)
1430#             cantdata += len(data)
1431#             time.sleep(.3)
1432#         conn.send("2 No more lines\n")
1433#         conn.close()
1434#     except socket.timeout:
1435#         pass
1436#     finally:
1437#         serv.close()
1438#         evt.set()
1439#
1440# class FTPWrapperTests(unittest.TestCase):
1441#
1442#     def setUp(self):
1443#         import ftplib, time, threading
1444#         ftplib.FTP.port = 9093
1445#         self.evt = threading.Event()
1446#         threading.Thread(target=server, args=(self.evt,)).start()
1447#         time.sleep(.1)
1448#
1449#     def tearDown(self):
1450#         self.evt.wait()
1451#
1452#     def testBasic(self):
1453#         # connects
1454#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1455#         ftp.close()
1456#
1457#     def testTimeoutNone(self):
1458#         # global default timeout is ignored
1459#         import socket
1460#         self.assertIsNone(socket.getdefaulttimeout())
1461#         socket.setdefaulttimeout(30)
1462#         try:
1463#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1464#         finally:
1465#             socket.setdefaulttimeout(None)
1466#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1467#         ftp.close()
1468#
1469#     def testTimeoutDefault(self):
1470#         # global default timeout is used
1471#         import socket
1472#         self.assertIsNone(socket.getdefaulttimeout())
1473#         socket.setdefaulttimeout(30)
1474#         try:
1475#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1476#         finally:
1477#             socket.setdefaulttimeout(None)
1478#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1479#         ftp.close()
1480#
1481#     def testTimeoutValue(self):
1482#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1483#                                 timeout=30)
1484#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1485#         ftp.close()
1486
1487
1488class RequestTests(unittest.TestCase):
1489    """Unit tests for urllib.request.Request."""
1490
1491    def test_default_values(self):
1492        Request = urllib.request.Request
1493        request = Request("http://www.python.org")
1494        self.assertEqual(request.get_method(), 'GET')
1495        request = Request("http://www.python.org", {})
1496        self.assertEqual(request.get_method(), 'POST')
1497
1498    def test_with_method_arg(self):
1499        Request = urllib.request.Request
1500        request = Request("http://www.python.org", method='HEAD')
1501        self.assertEqual(request.method, 'HEAD')
1502        self.assertEqual(request.get_method(), 'HEAD')
1503        request = Request("http://www.python.org", {}, method='HEAD')
1504        self.assertEqual(request.method, 'HEAD')
1505        self.assertEqual(request.get_method(), 'HEAD')
1506        request = Request("http://www.python.org", method='GET')
1507        self.assertEqual(request.get_method(), 'GET')
1508        request.method = 'HEAD'
1509        self.assertEqual(request.get_method(), 'HEAD')
1510
1511
1512class URL2PathNameTests(unittest.TestCase):
1513
1514    def test_converting_drive_letter(self):
1515        self.assertEqual(url2pathname("///C|"), 'C:')
1516        self.assertEqual(url2pathname("///C:"), 'C:')
1517        self.assertEqual(url2pathname("///C|/"), 'C:\\')
1518
1519    def test_converting_when_no_drive_letter(self):
1520        # cannot end a raw string in \
1521        self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1522        self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1523
1524    def test_simple_compare(self):
1525        self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1526                         r'C:\foo\bar\spam.foo')
1527
1528    def test_non_ascii_drive_letter(self):
1529        self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1530
1531    def test_roundtrip_url2pathname(self):
1532        list_of_paths = ['C:',
1533                         r'\\\C\test\\',
1534                         r'C:\foo\bar\spam.foo'
1535                         ]
1536        for path in list_of_paths:
1537            self.assertEqual(url2pathname(pathname2url(path)), path)
1538
1539class PathName2URLTests(unittest.TestCase):
1540
1541    def test_converting_drive_letter(self):
1542        self.assertEqual(pathname2url("C:"), '///C:')
1543        self.assertEqual(pathname2url("C:\\"), '///C:')
1544
1545    def test_converting_when_no_drive_letter(self):
1546        self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1547                         '/////folder/test/')
1548        self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1549                         '////folder/test/')
1550        self.assertEqual(pathname2url(r"\folder\test" "\\"),
1551                         '/folder/test/')
1552
1553    def test_simple_compare(self):
1554        self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1555                         "///C:/foo/bar/spam.foo" )
1556
1557    def test_long_drive_letter(self):
1558        self.assertRaises(IOError, pathname2url, "XX:\\")
1559
1560    def test_roundtrip_pathname2url(self):
1561        list_of_paths = ['///C:',
1562                         '/////folder/test/',
1563                         '///C:/foo/bar/spam.foo']
1564        for path in list_of_paths:
1565            self.assertEqual(pathname2url(url2pathname(path)), path)
1566
1567if __name__ == '__main__':
1568    unittest.main()
1569