10c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi#!/usr/bin/env python
20c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
30c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport unittest
40c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yifrom test import test_support
50c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
60c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport socket
70c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport urllib
80c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport sys
90c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport os
100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiimport time
110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yimimetools = test_support.import_module("mimetools", deprecated=True)
130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef _open_with_retry(func, host, *args, **kwargs):
160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # Connecting to remote hosts is flaky.  Make it more robust
170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    # by retrying the connection several times.
180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    for i in range(3):
190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return func(host, *args, **kwargs)
210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        except IOError, last_exc:
220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            continue
230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        except:
240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            raise
250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    raise last_exc
260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass URLTimeoutTest(unittest.TestCase):
290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    TIMEOUT = 10.0
310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def setUp(self):
330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        socket.setdefaulttimeout(self.TIMEOUT)
340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def tearDown(self):
360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        socket.setdefaulttimeout(None)
370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def testURLread(self):
390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        f = _open_with_retry(urllib.urlopen, "http://www.python.org/")
400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        x = f.read()
410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass urlopenNetworkTests(unittest.TestCase):
430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """Tests urllib.urlopen using the network.
440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    These tests are not exhaustive.  Assuming that testing using files does a
460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    good job overall of some of the basic interface features.  There are no
470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    tests exercising the optional 'data' and 'proxies' arguments.  No tests
480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    for transparent redirection have been written.
490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    setUp is not used for always constructing a connection to
510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    http://www.python.org/ since there a few tests that don't use that address
520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    and making a connection is expensive enough to warrant minimizing unneeded
530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    connections.
540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """
560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def urlopen(self, *args):
580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return _open_with_retry(urllib.urlopen, *args)
590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_basic(self):
610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Simple test expected to pass.
620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = self.urlopen("http://www.python.org/")
630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        for attr in ("read", "readline", "readlines", "fileno", "close",
640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                     "info", "geturl"):
650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertTrue(hasattr(open_url, attr), "object returned from "
660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                            "urlopen lacks the %s attribute" % attr)
670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertTrue(open_url.read(), "calling 'read' failed")
690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            open_url.close()
710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_readlines(self):
730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Test both readline and readlines.
740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = self.urlopen("http://www.python.org/")
750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertIsInstance(open_url.readline(), basestring,
770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  "readline did not return a string")
780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertIsInstance(open_url.readlines(), list,
790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  "readlines did not return a list")
800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            open_url.close()
820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_info(self):
840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Test 'info'.
850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = self.urlopen("http://www.python.org/")
860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            info_obj = open_url.info()
880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            open_url.close()
900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertIsInstance(info_obj, mimetools.Message,
910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  "object returned by 'info' is not an "
920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  "instance of mimetools.Message")
930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertEqual(info_obj.getsubtype(), "html")
940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_geturl(self):
960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Make sure same URL as opened is returned by geturl.
970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        URL = "http://www.python.org/"
980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = self.urlopen(URL)
990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            gotten_url = open_url.geturl()
1010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
1020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            open_url.close()
1030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(gotten_url, URL)
1040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_getcode(self):
1060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # test getcode() with the fancy opener to get 404 error codes
1070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        URL = "http://www.python.org/XXXinvalidXXX"
1080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = urllib.FancyURLopener().open(URL)
1090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            code = open_url.getcode()
1110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
1120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            open_url.close()
1130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(code, 404)
1140c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1150c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_fileno(self):
1160c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        if (sys.platform in ('win32',) or
1170c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                not hasattr(os, 'fdopen')):
1180c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # On Windows, socket handles are not file descriptors; this
1190c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # test can't pass on Windows.
1200c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            return
1210c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Make sure fd returned by fileno is valid.
1220c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        open_url = self.urlopen("http://www.python.org/")
1230c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        fd = open_url.fileno()
1240c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        FILE = os.fdopen(fd)
1250c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1260c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertTrue(FILE.read(), "reading from file created using fd "
1270c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                      "returned by fileno failed")
1280c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
1290c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            FILE.close()
1300c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1310c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_bad_address(self):
1320c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Make sure proper exception is raised when connecting to a bogus
1330c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # address.
1340c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        bogus_domain = "sadflkjsasf.i.nvali.d"
1350c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1360c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            socket.gethostbyname(bogus_domain)
1370c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        except socket.gaierror:
1380c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            pass
1390c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        else:
1400c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            # This happens with some overzealous DNS providers such as OpenDNS
1410c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.skipTest("%r should not resolve for test to work" % bogus_domain)
1420c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertRaises(IOError,
1430c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # SF patch 809915:  In Sep 2003, VeriSign started
1440c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # highjacking invalid .com and .net addresses to
1450c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # boost traffic to their own site.  This test
1460c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # started failing then.  One hopes the .invalid
1470c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # domain will be spared to serve its defined
1480c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # purpose.
1490c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          # urllib.urlopen, "http://www.sadflkjsasadf.com/")
1500c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                          urllib.urlopen, "http://sadflkjsasf.i.nvali.d/")
1510c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1520c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiclass urlretrieveNetworkTests(unittest.TestCase):
1530c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    """Tests urllib.urlretrieve using the network."""
1540c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1550c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def urlretrieve(self, *args):
1560c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        return _open_with_retry(urllib.urlretrieve, *args)
1570c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1580c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_basic(self):
1590c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Test basic functionality.
1600c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        file_location,info = self.urlretrieve("http://www.python.org/")
1610c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertTrue(os.path.exists(file_location), "file location returned by"
1620c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                        " urlretrieve is not a valid path")
1630c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        FILE = file(file_location)
1640c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1650c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertTrue(FILE.read(), "reading from the file location returned"
1660c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                         " by urlretrieve failed")
1670c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
1680c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            FILE.close()
1690c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            os.unlink(file_location)
1700c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1710c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_specified_path(self):
1720c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Make sure that specifying the location of the file to write to works.
1730c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        file_location,info = self.urlretrieve("http://www.python.org/",
1740c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                              test_support.TESTFN)
1750c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertEqual(file_location, test_support.TESTFN)
1760c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertTrue(os.path.exists(file_location))
1770c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        FILE = file(file_location)
1780c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1790c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.assertTrue(FILE.read(), "reading from temporary file failed")
1800c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        finally:
1810c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            FILE.close()
1820c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            os.unlink(file_location)
1830c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1840c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_header(self):
1850c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        # Make sure header returned as 2nd value from urlretrieve is good.
1860c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        file_location, header = self.urlretrieve("http://www.python.org/")
1870c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        os.unlink(file_location)
1880c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        self.assertIsInstance(header, mimetools.Message,
1890c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                              "header is not an instance of mimetools.Message")
1900c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
1910c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    def test_data_header(self):
1920c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
1930c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        file_location, fileheaders = self.urlretrieve(logo)
1940c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        os.unlink(file_location)
1950c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        datevalue = fileheaders.getheader('Date')
1960c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        dateformat = '%a, %d %b %Y %H:%M:%S GMT'
1970c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        try:
1980c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            time.strptime(datevalue, dateformat)
1990c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        except ValueError:
2000c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            self.fail('Date value not in %r format', dateformat)
2010c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2020c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2030c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2040c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yidef test_main():
2050c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    test_support.requires('network')
2060c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    with test_support.check_py3k_warnings(
2070c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi            ("urllib.urlopen.. has been removed", DeprecationWarning)):
2080c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi        test_support.run_unittest(URLTimeoutTest,
2090c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  urlopenNetworkTests,
2100c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi                                  urlretrieveNetworkTests)
2110c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi
2120c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yiif __name__ == "__main__":
2130c5958b1636c47ed7c284f859c8e805fd06a0e6Bill Yi    test_main()
214