14710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmimport unittest, StringIO, robotparser
24710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmfrom test import test_support
34710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
44710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass RobotTestCase(unittest.TestCase):
54710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __init__(self, index, parser, url, good, agent):
64710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        unittest.TestCase.__init__(self)
74710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if good:
84710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.str = "RobotTest(%d, good, %s)" % (index, url)
94710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.str = "RobotTest(%d, bad, %s)" % (index, url)
114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.parser = parser
124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.url = url
134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.good = good
144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        self.agent = agent
154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def runTest(self):
174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if isinstance(self.url, tuple):
184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            agent, url = self.url
194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            url = self.url
214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            agent = self.agent
224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        if self.good:
234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(self.parser.can_fetch(agent, url))
244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        else:
254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertFalse(self.parser.can_fetch(agent, url))
264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def __str__(self):
284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        return self.str
294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmtests = unittest.TestSuite()
314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef RobotTest(index, robots_txt, good_urls, bad_urls,
334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm              agent="test_robotparser"):
344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    lines = StringIO.StringIO(robots_txt).readlines()
364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser = robotparser.RobotFileParser()
374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    parser.parse(lines)
384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for url in good_urls:
394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tests.addTest(RobotTestCase(index, parser, url, 1, agent))
404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    for url in bad_urls:
414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        tests.addTest(RobotTestCase(index, parser, url, 0, agent))
424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Examples from http://www.robotstxt.org/wc/norobots.html (fetched 2002)
444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 1.
464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /cyberworld/map/ # This is an infinite virtual URL space
494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /tmp/ # these will soon disappear
504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /foo.html
514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/','/test.html']
544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/cyberworld/map/index.html','/tmp/xxx','/foo.html']
554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(1, doc, good, bad)
574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 2.
594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# robots.txt for http://www.example.com/
614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /cyberworld/map/ # This is an infinite virtual URL space
644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Cybermapper knows where to go.
664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: cybermapper
674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow:
684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/','/test.html',('cybermapper','/cyberworld/map/index.html')]
724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/cyberworld/map/index.html']
734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(2, doc, good, bad)
754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 3.
774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# go away
794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /
814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = []
844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/cyberworld/map/index.html','/','/tmp/']
854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(3, doc, good, bad)
874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# Examples from http://www.robotstxt.org/wc/norobots-rfc.html (fetched 2002)
894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 4.
914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: figtree
934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /tmp
944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /a%3cd.html
954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /a%2fb.html
964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /%7ejoe/index.html
974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = [] # XFAIL '/a/b.html'
1004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/tmp','/tmp.html','/tmp/a.html',
1014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       '/a%3cd.html','/a%3Cd.html','/a%2fb.html',
1024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       '/~joe/index.html'
1034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       ]
1044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(4, doc, good, bad, 'figtree')
1064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(5, doc, good, bad, 'FigTree Robot libwww-perl/5.04')
1074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 6.
1094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
1114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /tmp/
1124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /a%3Cd.html
1134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /a/b.html
1144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /%7ejoe/index.html
1154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/tmp',] # XFAIL: '/a%2fb.html'
1184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/tmp/','/tmp/a.html',
1194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       '/a%3cd.html','/a%3Cd.html',"/a/b.html",
1204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm       '/%7Ejoe/index.html']
1214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(6, doc, good, bad)
1234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# From bug report #523041
1254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 7.
1274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-Agent: *
1294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /.
1304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/foo.html']
1334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = [] # Bug report says "/" should be denied, but that is not in the RFC
1344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(7, doc, good, bad)
1364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# From Google: http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=40364
1384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 8.
1404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot
1424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAllow: /folder1/myfile.html
1434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /folder1/
1444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/folder1/myfile.html']
1474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/folder1/anotherfile.html']
1484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(8, doc, good, bad, agent="Googlebot")
1504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 9.  This file is incorrect because "Googlebot" is a substring of
1524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#     "Googlebot-Mobile", so test 10 works just like test 9.
1534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot
1554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /
1564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot-Mobile
1584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAllow: /
1594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = []
1624710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/something.jpg']
1634710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1644710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(9, doc, good, bad, agent="Googlebot")
1654710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1664710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = []
1674710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/something.jpg']
1684710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1694710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(10, doc, good, bad, agent="Googlebot-Mobile")
1704710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1714710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 11.  Get the order correct.
1724710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1734710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot-Mobile
1744710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAllow: /
1754710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1764710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot
1774710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /
1784710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1794710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1804710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = []
1814710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/something.jpg']
1824710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1834710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(11, doc, good, bad, agent="Googlebot")
1844710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1854710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/something.jpg']
1864710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = []
1874710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1884710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(12, doc, good, bad, agent="Googlebot-Mobile")
1894710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1904710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1914710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 13.  Google also got the order wrong in #8.  You need to specify the
1924710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm#      URLs from more specific to more general.
1934710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
1944710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: Googlebot
1954710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmAllow: /folder1/myfile.html
1964710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /folder1/
1974710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
1984710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
1994710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/folder1/myfile.html']
2004710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/folder1/anotherfile.html']
2014710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2024710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(13, doc, good, bad, agent="googlebot")
2034710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2044710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2054710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 14. For issue #6325 (query string support)
2064710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
2074710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
2084710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /some/path?name=value
2094710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
2104710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2114710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/some/path']
2124710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/some/path?name=value']
2134710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2144710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(14, doc, good, bad)
2154710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2164710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm# 15. For issue #4108 (obey first * entry)
2174710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdoc = """
2184710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
2194710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /some/path
2204710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2214710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmUser-agent: *
2224710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmDisallow: /another/path
2234710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm"""
2244710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2254710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmgood = ['/another/path']
2264710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmbad = ['/some/path']
2274710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2284710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmRobotTest(15, doc, good, bad)
2294710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2304710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2314710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmclass NetworkTestCase(unittest.TestCase):
2324710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2334710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def testPasswordProtectedSite(self):
2344710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test_support.requires('network')
2354710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        with test_support.transient_internet('mueblesmoraleda.com'):
2364710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            url = 'http://mueblesmoraleda.com'
2374710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser = robotparser.RobotFileParser()
2384710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser.set_url(url)
2394710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            try:
2404710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                parser.read()
2414710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            except IOError:
2424710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                self.skipTest('%s is unavailable' % url)
2434710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False)
2444710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2454710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    def testPythonOrg(self):
2464710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        test_support.requires('network')
2474710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm        with test_support.transient_internet('www.python.org'):
2484710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser = robotparser.RobotFileParser(
2494710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                "http://www.python.org/robots.txt")
2504710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            parser.read()
2514710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm            self.assertTrue(
2524710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm                parser.can_fetch("*", "http://www.python.org/robots.txt"))
2534710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2544710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2554710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmdef test_main():
2564710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_support.run_unittest(tests)
2574710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_support.run_unittest(NetworkTestCase)
2584710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm
2594710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylmif __name__=='__main__':
2604710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_support.verbose = 1
2614710c53dcad1ebf3755f3efb9e80ac24bd72a9b2darylm    test_main()
262