1"""Regression tests for urllib"""
2
3import collections
4import urllib
5import httplib
6import io
7import unittest
8import os
9import sys
10import mimetools
11import tempfile
12
13from test import test_support
14from base64 import b64encode
15
16
17def hexescape(char):
18    """Escape char as RFC 2396 specifies"""
19    hex_repr = hex(ord(char))[2:].upper()
20    if len(hex_repr) == 1:
21        hex_repr = "0%s" % hex_repr
22    return "%" + hex_repr
23
24
25def fakehttp(fakedata):
26    class FakeSocket(io.BytesIO):
27
28        def sendall(self, data):
29            FakeHTTPConnection.buf = data
30
31        def makefile(self, *args, **kwds):
32            return self
33
34        def read(self, amt=None):
35            if self.closed:
36                return b""
37            return io.BytesIO.read(self, amt)
38
39        def readline(self, length=None):
40            if self.closed:
41                return b""
42            return io.BytesIO.readline(self, length)
43
44    class FakeHTTPConnection(httplib.HTTPConnection):
45
46        # buffer to store data for verification in urlopen tests.
47        buf = ""
48
49        def connect(self):
50            self.sock = FakeSocket(self.fakedata)
51            self.__class__.fakesock = self.sock
52    FakeHTTPConnection.fakedata = fakedata
53
54    return FakeHTTPConnection
55
56
57class FakeHTTPMixin(object):
58    def fakehttp(self, fakedata):
59        assert httplib.HTTP._connection_class == httplib.HTTPConnection
60
61        httplib.HTTP._connection_class = fakehttp(fakedata)
62
63    def unfakehttp(self):
64        httplib.HTTP._connection_class = httplib.HTTPConnection
65
66
67class urlopen_FileTests(unittest.TestCase):
68    """Test urlopen() opening a temporary file.
69
70    Try to test as much functionality as possible so as to cut down on reliance
71    on connecting to the Net for testing.
72
73    """
74
75    def setUp(self):
76        """Setup of a temp file to use for testing"""
77        self.text = "test_urllib: %s\n" % self.__class__.__name__
78        FILE = file(test_support.TESTFN, 'wb')
79        try:
80            FILE.write(self.text)
81        finally:
82            FILE.close()
83        self.pathname = test_support.TESTFN
84        self.returned_obj = urllib.urlopen("file:%s" % self.pathname)
85
86    def tearDown(self):
87        """Shut down the open object"""
88        self.returned_obj.close()
89        os.remove(test_support.TESTFN)
90
91    def test_interface(self):
92        # Make sure object returned by urlopen() has the specified methods
93        for attr in ("read", "readline", "readlines", "fileno",
94                     "close", "info", "geturl", "getcode", "__iter__"):
95            self.assertTrue(hasattr(self.returned_obj, attr),
96                         "object returned by urlopen() lacks %s attribute" %
97                         attr)
98
99    def test_read(self):
100        self.assertEqual(self.text, self.returned_obj.read())
101
102    def test_readline(self):
103        self.assertEqual(self.text, self.returned_obj.readline())
104        self.assertEqual('', self.returned_obj.readline(),
105                         "calling readline() after exhausting the file did not"
106                         " return an empty string")
107
108    def test_readlines(self):
109        lines_list = self.returned_obj.readlines()
110        self.assertEqual(len(lines_list), 1,
111                         "readlines() returned the wrong number of lines")
112        self.assertEqual(lines_list[0], self.text,
113                         "readlines() returned improper text")
114
115    def test_fileno(self):
116        file_num = self.returned_obj.fileno()
117        self.assertIsInstance(file_num, int, "fileno() did not return an int")
118        self.assertEqual(os.read(file_num, len(self.text)), self.text,
119                         "Reading on the file descriptor returned by fileno() "
120                         "did not return the expected text")
121
122    def test_close(self):
123        # Test close() by calling it hear and then having it be called again
124        # by the tearDown() method for the test
125        self.returned_obj.close()
126
127    def test_info(self):
128        self.assertIsInstance(self.returned_obj.info(), mimetools.Message)
129
130    def test_geturl(self):
131        self.assertEqual(self.returned_obj.geturl(), self.pathname)
132
133    def test_getcode(self):
134        self.assertEqual(self.returned_obj.getcode(), None)
135
136    def test_iter(self):
137        # Test iterator
138        # Don't need to count number of iterations since test would fail the
139        # instant it returned anything beyond the first line from the
140        # comparison
141        for line in self.returned_obj.__iter__():
142            self.assertEqual(line, self.text)
143
144    def test_relativelocalfile(self):
145        self.assertRaises(ValueError,urllib.urlopen,'./' + self.pathname)
146
147class ProxyTests(unittest.TestCase):
148
149    def setUp(self):
150        # Records changes to env vars
151        self.env = test_support.EnvironmentVarGuard()
152        # Delete all proxy related env vars
153        for k in os.environ.keys():
154            if 'proxy' in k.lower():
155                self.env.unset(k)
156
157    def tearDown(self):
158        # Restore all proxy related env vars
159        self.env.__exit__()
160        del self.env
161
162    def test_getproxies_environment_keep_no_proxies(self):
163        self.env.set('NO_PROXY', 'localhost')
164        proxies = urllib.getproxies_environment()
165        # getproxies_environment use lowered case truncated (no '_proxy') keys
166        self.assertEqual('localhost', proxies['no'])
167        # List of no_proxies with space.
168        self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
169        self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com'))
170        self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888'))
171        self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234'))
172
173    def test_proxy_cgi_ignore(self):
174        try:
175            self.env.set('HTTP_PROXY', 'http://somewhere:3128')
176            proxies = urllib.getproxies_environment()
177            self.assertEqual('http://somewhere:3128', proxies['http'])
178            self.env.set('REQUEST_METHOD', 'GET')
179            proxies = urllib.getproxies_environment()
180            self.assertNotIn('http', proxies)
181        finally:
182            self.env.unset('REQUEST_METHOD')
183            self.env.unset('HTTP_PROXY')
184
185    def test_proxy_bypass_environment_host_match(self):
186        bypass = urllib.proxy_bypass_environment
187        self.env.set('NO_PROXY',
188            'localhost, anotherdomain.com, newdomain.com:1234')
189        self.assertTrue(bypass('localhost'))
190        self.assertTrue(bypass('LocalHost'))                 # MixedCase
191        self.assertTrue(bypass('LOCALHOST'))                 # UPPERCASE
192        self.assertTrue(bypass('newdomain.com:1234'))
193        self.assertTrue(bypass('anotherdomain.com:8888'))
194        self.assertTrue(bypass('www.newdomain.com:1234'))
195        self.assertFalse(bypass('prelocalhost'))
196        self.assertFalse(bypass('newdomain.com'))            # no port
197        self.assertFalse(bypass('newdomain.com:1235'))       # wrong port
198
199class ProxyTests_withOrderedEnv(unittest.TestCase):
200
201    def setUp(self):
202        # We need to test conditions, where variable order _is_ significant
203        self._saved_env = os.environ
204        # Monkey patch os.environ, start with empty fake environment
205        os.environ = collections.OrderedDict()
206
207    def tearDown(self):
208        os.environ = self._saved_env
209
210    def test_getproxies_environment_prefer_lowercase(self):
211        # Test lowercase preference with removal
212        os.environ['no_proxy'] = ''
213        os.environ['No_Proxy'] = 'localhost'
214        self.assertFalse(urllib.proxy_bypass_environment('localhost'))
215        self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
216        os.environ['http_proxy'] = ''
217        os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
218        proxies = urllib.getproxies_environment()
219        self.assertEqual({}, proxies)
220        # Test lowercase preference of proxy bypass and correct matching including ports
221        os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
222        os.environ['No_Proxy'] = 'xyz.com'
223        self.assertTrue(urllib.proxy_bypass_environment('localhost'))
224        self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678'))
225        self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234'))
226        self.assertFalse(urllib.proxy_bypass_environment('my.proxy'))
227        self.assertFalse(urllib.proxy_bypass_environment('arbitrary'))
228        # Test lowercase preference with replacement
229        os.environ['http_proxy'] = 'http://somewhere:3128'
230        os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
231        proxies = urllib.getproxies_environment()
232        self.assertEqual('http://somewhere:3128', proxies['http'])
233
234
235class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
236    """Test urlopen() opening a fake http connection."""
237
238    def test_read(self):
239        self.fakehttp('Hello!')
240        try:
241            fp = urllib.urlopen("http://python.org/")
242            self.assertEqual(fp.readline(), 'Hello!')
243            self.assertEqual(fp.readline(), '')
244            self.assertEqual(fp.geturl(), 'http://python.org/')
245            self.assertEqual(fp.getcode(), 200)
246        finally:
247            self.unfakehttp()
248
249    def test_url_fragment(self):
250        # Issue #11703: geturl() omits fragments in the original URL.
251        url = 'http://docs.python.org/library/urllib.html#OK'
252        self.fakehttp('Hello!')
253        try:
254            fp = urllib.urlopen(url)
255            self.assertEqual(fp.geturl(), url)
256        finally:
257            self.unfakehttp()
258
259    def test_read_bogus(self):
260        # urlopen() should raise IOError for many error codes.
261        self.fakehttp('''HTTP/1.1 401 Authentication Required
262Date: Wed, 02 Jan 2008 03:03:54 GMT
263Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
264Connection: close
265Content-Type: text/html; charset=iso-8859-1
266''')
267        try:
268            self.assertRaises(IOError, urllib.urlopen, "http://python.org/")
269        finally:
270            self.unfakehttp()
271
272    def test_invalid_redirect(self):
273        # urlopen() should raise IOError for many error codes.
274        self.fakehttp("""HTTP/1.1 302 Found
275Date: Wed, 02 Jan 2008 03:03:54 GMT
276Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
277Location: file:README
278Connection: close
279Content-Type: text/html; charset=iso-8859-1
280""")
281        try:
282            msg = "Redirection to url 'file:"
283            with self.assertRaisesRegexp(IOError, msg):
284                urllib.urlopen("http://python.org/")
285        finally:
286            self.unfakehttp()
287
288    def test_redirect_limit_independent(self):
289        # Ticket #12923: make sure independent requests each use their
290        # own retry limit.
291        for i in range(urllib.FancyURLopener().maxtries):
292            self.fakehttp(b'''HTTP/1.1 302 Found
293Location: file://guidocomputer.athome.com:/python/license
294Connection: close
295''')
296            try:
297                self.assertRaises(IOError, urllib.urlopen,
298                    "http://something")
299            finally:
300                self.unfakehttp()
301
302    def test_empty_socket(self):
303        # urlopen() raises IOError if the underlying socket does not send any
304        # data. (#1680230)
305        self.fakehttp('')
306        try:
307            self.assertRaises(IOError, urllib.urlopen, 'http://something')
308        finally:
309            self.unfakehttp()
310
311    def test_missing_localfile(self):
312        self.assertRaises(IOError, urllib.urlopen,
313                'file://localhost/a/missing/file.py')
314        fd, tmp_file = tempfile.mkstemp()
315        tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
316        self.assertTrue(os.path.exists(tmp_file))
317        try:
318            fp = urllib.urlopen(tmp_fileurl)
319            fp.close()
320        finally:
321            os.close(fd)
322            os.unlink(tmp_file)
323
324        self.assertFalse(os.path.exists(tmp_file))
325        self.assertRaises(IOError, urllib.urlopen, tmp_fileurl)
326
327    def test_ftp_nonexisting(self):
328        self.assertRaises(IOError, urllib.urlopen,
329                'ftp://localhost/not/existing/file.py')
330
331
332    def test_userpass_inurl(self):
333        self.fakehttp('Hello!')
334        try:
335            fakehttp_wrapper = httplib.HTTP._connection_class
336            fp = urllib.urlopen("http://user:pass@python.org/")
337            authorization = ("Authorization: Basic %s\r\n" %
338                            b64encode('user:pass'))
339            # The authorization header must be in place
340            self.assertIn(authorization, fakehttp_wrapper.buf)
341            self.assertEqual(fp.readline(), "Hello!")
342            self.assertEqual(fp.readline(), "")
343            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
344            self.assertEqual(fp.getcode(), 200)
345        finally:
346            self.unfakehttp()
347
348    def test_userpass_with_spaces_inurl(self):
349        self.fakehttp('Hello!')
350        try:
351            url = "http://a b:c d@python.org/"
352            fakehttp_wrapper = httplib.HTTP._connection_class
353            authorization = ("Authorization: Basic %s\r\n" %
354                             b64encode('a b:c d'))
355            fp = urllib.urlopen(url)
356            # The authorization header must be in place
357            self.assertIn(authorization, fakehttp_wrapper.buf)
358            self.assertEqual(fp.readline(), "Hello!")
359            self.assertEqual(fp.readline(), "")
360            # the spaces are quoted in URL so no match
361            self.assertNotEqual(fp.geturl(), url)
362            self.assertEqual(fp.getcode(), 200)
363        finally:
364            self.unfakehttp()
365
366
367class urlretrieve_FileTests(unittest.TestCase):
368    """Test urllib.urlretrieve() on local files"""
369
370    def setUp(self):
371        # Create a list of temporary files. Each item in the list is a file
372        # name (absolute path or relative to the current working directory).
373        # All files in this list will be deleted in the tearDown method. Note,
374        # this only helps to makes sure temporary files get deleted, but it
375        # does nothing about trying to close files that may still be open. It
376        # is the responsibility of the developer to properly close files even
377        # when exceptional conditions occur.
378        self.tempFiles = []
379
380        # Create a temporary file.
381        self.registerFileForCleanUp(test_support.TESTFN)
382        self.text = 'testing urllib.urlretrieve'
383        try:
384            FILE = file(test_support.TESTFN, 'wb')
385            FILE.write(self.text)
386            FILE.close()
387        finally:
388            try: FILE.close()
389            except: pass
390
391    def tearDown(self):
392        # Delete the temporary files.
393        for each in self.tempFiles:
394            try: os.remove(each)
395            except: pass
396
397    def constructLocalFileUrl(self, filePath):
398        return "file://%s" % urllib.pathname2url(os.path.abspath(filePath))
399
400    def createNewTempFile(self, data=""):
401        """Creates a new temporary file containing the specified data,
402        registers the file for deletion during the test fixture tear down, and
403        returns the absolute path of the file."""
404
405        newFd, newFilePath = tempfile.mkstemp()
406        try:
407            self.registerFileForCleanUp(newFilePath)
408            newFile = os.fdopen(newFd, "wb")
409            newFile.write(data)
410            newFile.close()
411        finally:
412            try: newFile.close()
413            except: pass
414        return newFilePath
415
416    def registerFileForCleanUp(self, fileName):
417        self.tempFiles.append(fileName)
418
419    def test_basic(self):
420        # Make sure that a local file just gets its own location returned and
421        # a headers value is returned.
422        result = urllib.urlretrieve("file:%s" % test_support.TESTFN)
423        self.assertEqual(result[0], test_support.TESTFN)
424        self.assertIsInstance(result[1], mimetools.Message,
425                              "did not get a mimetools.Message instance as "
426                              "second returned value")
427
428    def test_copy(self):
429        # Test that setting the filename argument works.
430        second_temp = "%s.2" % test_support.TESTFN
431        self.registerFileForCleanUp(second_temp)
432        result = urllib.urlretrieve(self.constructLocalFileUrl(
433            test_support.TESTFN), second_temp)
434        self.assertEqual(second_temp, result[0])
435        self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
436                                                  "made")
437        FILE = file(second_temp, 'rb')
438        try:
439            text = FILE.read()
440            FILE.close()
441        finally:
442            try: FILE.close()
443            except: pass
444        self.assertEqual(self.text, text)
445
446    def test_reporthook(self):
447        # Make sure that the reporthook works.
448        def hooktester(count, block_size, total_size, count_holder=[0]):
449            self.assertIsInstance(count, int)
450            self.assertIsInstance(block_size, int)
451            self.assertIsInstance(total_size, int)
452            self.assertEqual(count, count_holder[0])
453            count_holder[0] = count_holder[0] + 1
454        second_temp = "%s.2" % test_support.TESTFN
455        self.registerFileForCleanUp(second_temp)
456        urllib.urlretrieve(self.constructLocalFileUrl(test_support.TESTFN),
457            second_temp, hooktester)
458
459    def test_reporthook_0_bytes(self):
460        # Test on zero length file. Should call reporthook only 1 time.
461        report = []
462        def hooktester(count, block_size, total_size, _report=report):
463            _report.append((count, block_size, total_size))
464        srcFileName = self.createNewTempFile()
465        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
466            test_support.TESTFN, hooktester)
467        self.assertEqual(len(report), 1)
468        self.assertEqual(report[0][2], 0)
469
470    def test_reporthook_5_bytes(self):
471        # Test on 5 byte file. Should call reporthook only 2 times (once when
472        # the "network connection" is established and once when the block is
473        # read). Since the block size is 8192 bytes, only one block read is
474        # required to read the entire file.
475        report = []
476        def hooktester(count, block_size, total_size, _report=report):
477            _report.append((count, block_size, total_size))
478        srcFileName = self.createNewTempFile("x" * 5)
479        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
480            test_support.TESTFN, hooktester)
481        self.assertEqual(len(report), 2)
482        self.assertEqual(report[0][1], 8192)
483        self.assertEqual(report[0][2], 5)
484
485    def test_reporthook_8193_bytes(self):
486        # Test on 8193 byte file. Should call reporthook only 3 times (once
487        # when the "network connection" is established, once for the next 8192
488        # bytes, and once for the last byte).
489        report = []
490        def hooktester(count, block_size, total_size, _report=report):
491            _report.append((count, block_size, total_size))
492        srcFileName = self.createNewTempFile("x" * 8193)
493        urllib.urlretrieve(self.constructLocalFileUrl(srcFileName),
494            test_support.TESTFN, hooktester)
495        self.assertEqual(len(report), 3)
496        self.assertEqual(report[0][1], 8192)
497        self.assertEqual(report[0][2], 8193)
498
499
500class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
501    """Test urllib.urlretrieve() using fake http connections"""
502
503    def test_short_content_raises_ContentTooShortError(self):
504        self.fakehttp('''HTTP/1.1 200 OK
505Date: Wed, 02 Jan 2008 03:03:54 GMT
506Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
507Connection: close
508Content-Length: 100
509Content-Type: text/html; charset=iso-8859-1
510
511FF
512''')
513
514        def _reporthook(par1, par2, par3):
515            pass
516
517        try:
518            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve,
519                    'http://example.com', reporthook=_reporthook)
520        finally:
521            self.unfakehttp()
522
523    def test_short_content_raises_ContentTooShortError_without_reporthook(self):
524        self.fakehttp('''HTTP/1.1 200 OK
525Date: Wed, 02 Jan 2008 03:03:54 GMT
526Server: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
527Connection: close
528Content-Length: 100
529Content-Type: text/html; charset=iso-8859-1
530
531FF
532''')
533        try:
534            self.assertRaises(urllib.ContentTooShortError, urllib.urlretrieve, 'http://example.com/')
535        finally:
536            self.unfakehttp()
537
538class QuotingTests(unittest.TestCase):
539    """Tests for urllib.quote() and urllib.quote_plus()
540
541    According to RFC 2396 ("Uniform Resource Identifiers), to escape a
542    character you write it as '%' + <2 character US-ASCII hex value>.  The Python
543    code of ``'%' + hex(ord(<character>))[2:]`` escapes a character properly.
544    Case does not matter on the hex letters.
545
546    The various character sets specified are:
547
548    Reserved characters : ";/?:@&=+$,"
549        Have special meaning in URIs and must be escaped if not being used for
550        their special meaning
551    Data characters : letters, digits, and "-_.!~*'()"
552        Unreserved and do not need to be escaped; can be, though, if desired
553    Control characters : 0x00 - 0x1F, 0x7F
554        Have no use in URIs so must be escaped
555    space : 0x20
556        Must be escaped
557    Delimiters : '<>#%"'
558        Must be escaped
559    Unwise : "{}|\^[]`"
560        Must be escaped
561
562    """
563
564    def test_never_quote(self):
565        # Make sure quote() does not quote letters, digits, and "_,.-"
566        do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
567                                 "abcdefghijklmnopqrstuvwxyz",
568                                 "0123456789",
569                                 "_.-"])
570        result = urllib.quote(do_not_quote)
571        self.assertEqual(do_not_quote, result,
572                         "using quote(): %s != %s" % (do_not_quote, result))
573        result = urllib.quote_plus(do_not_quote)
574        self.assertEqual(do_not_quote, result,
575                        "using quote_plus(): %s != %s" % (do_not_quote, result))
576
577    def test_default_safe(self):
578        # Test '/' is default value for 'safe' parameter
579        self.assertEqual(urllib.quote.func_defaults[0], '/')
580
581    def test_safe(self):
582        # Test setting 'safe' parameter does what it should do
583        quote_by_default = "<>"
584        result = urllib.quote(quote_by_default, safe=quote_by_default)
585        self.assertEqual(quote_by_default, result,
586                         "using quote(): %s != %s" % (quote_by_default, result))
587        result = urllib.quote_plus(quote_by_default, safe=quote_by_default)
588        self.assertEqual(quote_by_default, result,
589                         "using quote_plus(): %s != %s" %
590                         (quote_by_default, result))
591
592    def test_default_quoting(self):
593        # Make sure all characters that should be quoted are by default sans
594        # space (separate test for that).
595        should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
596        should_quote.append('<>#%"{}|\^[]`')
597        should_quote.append(chr(127)) # For 0x7F
598        should_quote = ''.join(should_quote)
599        for char in should_quote:
600            result = urllib.quote(char)
601            self.assertEqual(hexescape(char), result,
602                             "using quote(): %s should be escaped to %s, not %s" %
603                             (char, hexescape(char), result))
604            result = urllib.quote_plus(char)
605            self.assertEqual(hexescape(char), result,
606                             "using quote_plus(): "
607                             "%s should be escapes to %s, not %s" %
608                             (char, hexescape(char), result))
609        del should_quote
610        partial_quote = "ab[]cd"
611        expected = "ab%5B%5Dcd"
612        result = urllib.quote(partial_quote)
613        self.assertEqual(expected, result,
614                         "using quote(): %s != %s" % (expected, result))
615        result = urllib.quote_plus(partial_quote)
616        self.assertEqual(expected, result,
617                         "using quote_plus(): %s != %s" % (expected, result))
618        self.assertRaises(TypeError, urllib.quote, None)
619
620    def test_quoting_space(self):
621        # Make sure quote() and quote_plus() handle spaces as specified in
622        # their unique way
623        result = urllib.quote(' ')
624        self.assertEqual(result, hexescape(' '),
625                         "using quote(): %s != %s" % (result, hexescape(' ')))
626        result = urllib.quote_plus(' ')
627        self.assertEqual(result, '+',
628                         "using quote_plus(): %s != +" % result)
629        given = "a b cd e f"
630        expect = given.replace(' ', hexescape(' '))
631        result = urllib.quote(given)
632        self.assertEqual(expect, result,
633                         "using quote(): %s != %s" % (expect, result))
634        expect = given.replace(' ', '+')
635        result = urllib.quote_plus(given)
636        self.assertEqual(expect, result,
637                         "using quote_plus(): %s != %s" % (expect, result))
638
639    def test_quoting_plus(self):
640        self.assertEqual(urllib.quote_plus('alpha+beta gamma'),
641                         'alpha%2Bbeta+gamma')
642        self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
643                         'alpha+beta+gamma')
644
645class UnquotingTests(unittest.TestCase):
646    """Tests for unquote() and unquote_plus()
647
648    See the doc string for quoting_Tests for details on quoting and such.
649
650    """
651
652    def test_unquoting(self):
653        # Make sure unquoting of all ASCII values works
654        escape_list = []
655        for num in range(128):
656            given = hexescape(chr(num))
657            expect = chr(num)
658            result = urllib.unquote(given)
659            self.assertEqual(expect, result,
660                             "using unquote(): %s != %s" % (expect, result))
661            result = urllib.unquote_plus(given)
662            self.assertEqual(expect, result,
663                             "using unquote_plus(): %s != %s" %
664                             (expect, result))
665            escape_list.append(given)
666        escape_string = ''.join(escape_list)
667        del escape_list
668        result = urllib.unquote(escape_string)
669        self.assertEqual(result.count('%'), 1,
670                         "using quote(): not all characters escaped; %s" %
671                         result)
672        result = urllib.unquote(escape_string)
673        self.assertEqual(result.count('%'), 1,
674                         "using unquote(): not all characters escaped: "
675                         "%s" % result)
676
677    def test_unquoting_badpercent(self):
678        # Test unquoting on bad percent-escapes
679        given = '%xab'
680        expect = given
681        result = urllib.unquote(given)
682        self.assertEqual(expect, result, "using unquote(): %r != %r"
683                         % (expect, result))
684        given = '%x'
685        expect = given
686        result = urllib.unquote(given)
687        self.assertEqual(expect, result, "using unquote(): %r != %r"
688                         % (expect, result))
689        given = '%'
690        expect = given
691        result = urllib.unquote(given)
692        self.assertEqual(expect, result, "using unquote(): %r != %r"
693                         % (expect, result))
694
695    def test_unquoting_mixed_case(self):
696        # Test unquoting on mixed-case hex digits in the percent-escapes
697        given = '%Ab%eA'
698        expect = '\xab\xea'
699        result = urllib.unquote(given)
700        self.assertEqual(expect, result, "using unquote(): %r != %r"
701                         % (expect, result))
702
703    def test_unquoting_parts(self):
704        # Make sure unquoting works when have non-quoted characters
705        # interspersed
706        given = 'ab%sd' % hexescape('c')
707        expect = "abcd"
708        result = urllib.unquote(given)
709        self.assertEqual(expect, result,
710                         "using quote(): %s != %s" % (expect, result))
711        result = urllib.unquote_plus(given)
712        self.assertEqual(expect, result,
713                         "using unquote_plus(): %s != %s" % (expect, result))
714
715    def test_unquoting_plus(self):
716        # Test difference between unquote() and unquote_plus()
717        given = "are+there+spaces..."
718        expect = given
719        result = urllib.unquote(given)
720        self.assertEqual(expect, result,
721                         "using unquote(): %s != %s" % (expect, result))
722        expect = given.replace('+', ' ')
723        result = urllib.unquote_plus(given)
724        self.assertEqual(expect, result,
725                         "using unquote_plus(): %s != %s" % (expect, result))
726
727    def test_unquote_with_unicode(self):
728        r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
729        self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
730
731class urlencode_Tests(unittest.TestCase):
732    """Tests for urlencode()"""
733
734    def help_inputtype(self, given, test_type):
735        """Helper method for testing different input types.
736
737        'given' must lead to only the pairs:
738            * 1st, 1
739            * 2nd, 2
740            * 3rd, 3
741
742        Test cannot assume anything about order.  Docs make no guarantee and
743        have possible dictionary input.
744
745        """
746        expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
747        result = urllib.urlencode(given)
748        for expected in expect_somewhere:
749            self.assertIn(expected, result,
750                         "testing %s: %s not found in %s" %
751                         (test_type, expected, result))
752        self.assertEqual(result.count('&'), 2,
753                         "testing %s: expected 2 '&'s; got %s" %
754                         (test_type, result.count('&')))
755        amp_location = result.index('&')
756        on_amp_left = result[amp_location - 1]
757        on_amp_right = result[amp_location + 1]
758        self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
759                     "testing %s: '&' not located in proper place in %s" %
760                     (test_type, result))
761        self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
762                         "testing %s: "
763                         "unexpected number of characters: %s != %s" %
764                         (test_type, len(result), (5 * 3) + 2))
765
766    def test_using_mapping(self):
767        # Test passing in a mapping object as an argument.
768        self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
769                            "using dict as input type")
770
771    def test_using_sequence(self):
772        # Test passing in a sequence of two-item sequences as an argument.
773        self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
774                            "using sequence of two-item tuples as input")
775
776    def test_quoting(self):
777        # Make sure keys and values are quoted using quote_plus()
778        given = {"&":"="}
779        expect = "%s=%s" % (hexescape('&'), hexescape('='))
780        result = urllib.urlencode(given)
781        self.assertEqual(expect, result)
782        given = {"key name":"A bunch of pluses"}
783        expect = "key+name=A+bunch+of+pluses"
784        result = urllib.urlencode(given)
785        self.assertEqual(expect, result)
786
787    def test_doseq(self):
788        # Test that passing True for 'doseq' parameter works correctly
789        given = {'sequence':['1', '2', '3']}
790        expect = "sequence=%s" % urllib.quote_plus(str(['1', '2', '3']))
791        result = urllib.urlencode(given)
792        self.assertEqual(expect, result)
793        result = urllib.urlencode(given, True)
794        for value in given["sequence"]:
795            expect = "sequence=%s" % value
796            self.assertIn(expect, result)
797        self.assertEqual(result.count('&'), 2,
798                         "Expected 2 '&'s, got %s" % result.count('&'))
799
800class Pathname_Tests(unittest.TestCase):
801    """Test pathname2url() and url2pathname()"""
802
803    def test_basic(self):
804        # Make sure simple tests pass
805        expected_path = os.path.join("parts", "of", "a", "path")
806        expected_url = "parts/of/a/path"
807        result = urllib.pathname2url(expected_path)
808        self.assertEqual(expected_url, result,
809                         "pathname2url() failed; %s != %s" %
810                         (result, expected_url))
811        result = urllib.url2pathname(expected_url)
812        self.assertEqual(expected_path, result,
813                         "url2pathame() failed; %s != %s" %
814                         (result, expected_path))
815
816    def test_quoting(self):
817        # Test automatic quoting and unquoting works for pathnam2url() and
818        # url2pathname() respectively
819        given = os.path.join("needs", "quot=ing", "here")
820        expect = "needs/%s/here" % urllib.quote("quot=ing")
821        result = urllib.pathname2url(given)
822        self.assertEqual(expect, result,
823                         "pathname2url() failed; %s != %s" %
824                         (expect, result))
825        expect = given
826        result = urllib.url2pathname(result)
827        self.assertEqual(expect, result,
828                         "url2pathname() failed; %s != %s" %
829                         (expect, result))
830        given = os.path.join("make sure", "using_quote")
831        expect = "%s/using_quote" % urllib.quote("make sure")
832        result = urllib.pathname2url(given)
833        self.assertEqual(expect, result,
834                         "pathname2url() failed; %s != %s" %
835                         (expect, result))
836        given = "make+sure/using_unquote"
837        expect = os.path.join("make+sure", "using_unquote")
838        result = urllib.url2pathname(given)
839        self.assertEqual(expect, result,
840                         "url2pathname() failed; %s != %s" %
841                         (expect, result))
842
843    @unittest.skipUnless(sys.platform == 'win32',
844                         'test specific to the nturl2path library')
845    def test_ntpath(self):
846        given = ('/C:/', '///C:/', '/C|//')
847        expect = 'C:\\'
848        for url in given:
849            result = urllib.url2pathname(url)
850            self.assertEqual(expect, result,
851                             'nturl2path.url2pathname() failed; %s != %s' %
852                             (expect, result))
853        given = '///C|/path'
854        expect = 'C:\\path'
855        result = urllib.url2pathname(given)
856        self.assertEqual(expect, result,
857                         'nturl2path.url2pathname() failed; %s != %s' %
858                         (expect, result))
859
860class Utility_Tests(unittest.TestCase):
861    """Testcase to test the various utility functions in the urllib."""
862    # In Python 3 this test class is moved to test_urlparse.
863
864    def test_splittype(self):
865        splittype = urllib.splittype
866        self.assertEqual(splittype('type:opaquestring'), ('type', 'opaquestring'))
867        self.assertEqual(splittype('opaquestring'), (None, 'opaquestring'))
868        self.assertEqual(splittype(':opaquestring'), (None, ':opaquestring'))
869        self.assertEqual(splittype('type:'), ('type', ''))
870        self.assertEqual(splittype('type:opaque:string'), ('type', 'opaque:string'))
871
872    def test_splithost(self):
873        splithost = urllib.splithost
874        self.assertEqual(splithost('//www.example.org:80/foo/bar/baz.html'),
875                         ('www.example.org:80', '/foo/bar/baz.html'))
876        self.assertEqual(splithost('//www.example.org:80'),
877                         ('www.example.org:80', ''))
878        self.assertEqual(splithost('/foo/bar/baz.html'),
879                         (None, '/foo/bar/baz.html'))
880
881    def test_splituser(self):
882        splituser = urllib.splituser
883        self.assertEqual(splituser('User:Pass@www.python.org:080'),
884                         ('User:Pass', 'www.python.org:080'))
885        self.assertEqual(splituser('@www.python.org:080'),
886                         ('', 'www.python.org:080'))
887        self.assertEqual(splituser('www.python.org:080'),
888                         (None, 'www.python.org:080'))
889        self.assertEqual(splituser('User:Pass@'),
890                         ('User:Pass', ''))
891        self.assertEqual(splituser('User@example.com:Pass@www.python.org:080'),
892                         ('User@example.com:Pass', 'www.python.org:080'))
893
894    def test_splitpasswd(self):
895        # Some of the password examples are not sensible, but it is added to
896        # confirming to RFC2617 and addressing issue4675.
897        splitpasswd = urllib.splitpasswd
898        self.assertEqual(splitpasswd('user:ab'), ('user', 'ab'))
899        self.assertEqual(splitpasswd('user:a\nb'), ('user', 'a\nb'))
900        self.assertEqual(splitpasswd('user:a\tb'), ('user', 'a\tb'))
901        self.assertEqual(splitpasswd('user:a\rb'), ('user', 'a\rb'))
902        self.assertEqual(splitpasswd('user:a\fb'), ('user', 'a\fb'))
903        self.assertEqual(splitpasswd('user:a\vb'), ('user', 'a\vb'))
904        self.assertEqual(splitpasswd('user:a:b'), ('user', 'a:b'))
905        self.assertEqual(splitpasswd('user:a b'), ('user', 'a b'))
906        self.assertEqual(splitpasswd('user 2:ab'), ('user 2', 'ab'))
907        self.assertEqual(splitpasswd('user+1:a+b'), ('user+1', 'a+b'))
908        self.assertEqual(splitpasswd('user:'), ('user', ''))
909        self.assertEqual(splitpasswd('user'), ('user', None))
910        self.assertEqual(splitpasswd(':ab'), ('', 'ab'))
911
912    def test_splitport(self):
913        splitport = urllib.splitport
914        self.assertEqual(splitport('parrot:88'), ('parrot', '88'))
915        self.assertEqual(splitport('parrot'), ('parrot', None))
916        self.assertEqual(splitport('parrot:'), ('parrot', None))
917        self.assertEqual(splitport('127.0.0.1'), ('127.0.0.1', None))
918        self.assertEqual(splitport('parrot:cheese'), ('parrot:cheese', None))
919        self.assertEqual(splitport('[::1]:88'), ('[::1]', '88'))
920        self.assertEqual(splitport('[::1]'), ('[::1]', None))
921        self.assertEqual(splitport(':88'), ('', '88'))
922
923    def test_splitnport(self):
924        splitnport = urllib.splitnport
925        self.assertEqual(splitnport('parrot:88'), ('parrot', 88))
926        self.assertEqual(splitnport('parrot'), ('parrot', -1))
927        self.assertEqual(splitnport('parrot', 55), ('parrot', 55))
928        self.assertEqual(splitnport('parrot:'), ('parrot', -1))
929        self.assertEqual(splitnport('parrot:', 55), ('parrot', 55))
930        self.assertEqual(splitnport('127.0.0.1'), ('127.0.0.1', -1))
931        self.assertEqual(splitnport('127.0.0.1', 55), ('127.0.0.1', 55))
932        self.assertEqual(splitnport('parrot:cheese'), ('parrot', None))
933        self.assertEqual(splitnport('parrot:cheese', 55), ('parrot', None))
934
935    def test_splitquery(self):
936        # Normal cases are exercised by other tests; ensure that we also
937        # catch cases with no port specified (testcase ensuring coverage)
938        splitquery = urllib.splitquery
939        self.assertEqual(splitquery('http://python.org/fake?foo=bar'),
940                         ('http://python.org/fake', 'foo=bar'))
941        self.assertEqual(splitquery('http://python.org/fake?foo=bar?'),
942                         ('http://python.org/fake?foo=bar', ''))
943        self.assertEqual(splitquery('http://python.org/fake'),
944                         ('http://python.org/fake', None))
945        self.assertEqual(splitquery('?foo=bar'), ('', 'foo=bar'))
946
947    def test_splittag(self):
948        splittag = urllib.splittag
949        self.assertEqual(splittag('http://example.com?foo=bar#baz'),
950                         ('http://example.com?foo=bar', 'baz'))
951        self.assertEqual(splittag('http://example.com?foo=bar#'),
952                         ('http://example.com?foo=bar', ''))
953        self.assertEqual(splittag('#baz'), ('', 'baz'))
954        self.assertEqual(splittag('http://example.com?foo=bar'),
955                         ('http://example.com?foo=bar', None))
956        self.assertEqual(splittag('http://example.com?foo=bar#baz#boo'),
957                         ('http://example.com?foo=bar#baz', 'boo'))
958
959    def test_splitattr(self):
960        splitattr = urllib.splitattr
961        self.assertEqual(splitattr('/path;attr1=value1;attr2=value2'),
962                         ('/path', ['attr1=value1', 'attr2=value2']))
963        self.assertEqual(splitattr('/path;'), ('/path', ['']))
964        self.assertEqual(splitattr(';attr1=value1;attr2=value2'),
965                         ('', ['attr1=value1', 'attr2=value2']))
966        self.assertEqual(splitattr('/path'), ('/path', []))
967
968    def test_splitvalue(self):
969        # Normal cases are exercised by other tests; test pathological cases
970        # with no key/value pairs. (testcase ensuring coverage)
971        splitvalue = urllib.splitvalue
972        self.assertEqual(splitvalue('foo=bar'), ('foo', 'bar'))
973        self.assertEqual(splitvalue('foo='), ('foo', ''))
974        self.assertEqual(splitvalue('=bar'), ('', 'bar'))
975        self.assertEqual(splitvalue('foobar'), ('foobar', None))
976        self.assertEqual(splitvalue('foo=bar=baz'), ('foo', 'bar=baz'))
977
978    def test_toBytes(self):
979        result = urllib.toBytes(u'http://www.python.org')
980        self.assertEqual(result, 'http://www.python.org')
981        self.assertRaises(UnicodeError, urllib.toBytes,
982                          test_support.u(r'http://www.python.org/medi\u00e6val'))
983
984    def test_unwrap(self):
985        url = urllib.unwrap('<URL:type://host/path>')
986        self.assertEqual(url, 'type://host/path')
987
988
989class URLopener_Tests(unittest.TestCase):
990    """Testcase to test the open method of URLopener class."""
991
992    def test_quoted_open(self):
993        class DummyURLopener(urllib.URLopener):
994            def open_spam(self, url):
995                return url
996
997        self.assertEqual(DummyURLopener().open(
998            'spam://example/ /'),'//example/%20/')
999
1000        # test the safe characters are not quoted by urlopen
1001        self.assertEqual(DummyURLopener().open(
1002            "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1003            "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1004
1005
1006# Just commented them out.
1007# Can't really tell why keep failing in windows and sparc.
1008# Everywhere else they work ok, but on those machines, sometimes
1009# fail in one of the tests, sometimes in other. I have a linux, and
1010# the tests go ok.
1011# If anybody has one of the problematic environments, please help!
1012# .   Facundo
1013#
1014# def server(evt):
1015#     import socket, time
1016#     serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1017#     serv.settimeout(3)
1018#     serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1019#     serv.bind(("", 9093))
1020#     serv.listen(5)
1021#     try:
1022#         conn, addr = serv.accept()
1023#         conn.send("1 Hola mundo\n")
1024#         cantdata = 0
1025#         while cantdata < 13:
1026#             data = conn.recv(13-cantdata)
1027#             cantdata += len(data)
1028#             time.sleep(.3)
1029#         conn.send("2 No more lines\n")
1030#         conn.close()
1031#     except socket.timeout:
1032#         pass
1033#     finally:
1034#         serv.close()
1035#         evt.set()
1036#
1037# class FTPWrapperTests(unittest.TestCase):
1038#
1039#     def setUp(self):
1040#         import ftplib, time, threading
1041#         ftplib.FTP.port = 9093
1042#         self.evt = threading.Event()
1043#         threading.Thread(target=server, args=(self.evt,)).start()
1044#         time.sleep(.1)
1045#
1046#     def tearDown(self):
1047#         self.evt.wait()
1048#
1049#     def testBasic(self):
1050#         # connects
1051#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1052#         ftp.close()
1053#
1054#     def testTimeoutNone(self):
1055#         # global default timeout is ignored
1056#         import socket
1057#         self.assertIsNone(socket.getdefaulttimeout())
1058#         socket.setdefaulttimeout(30)
1059#         try:
1060#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1061#         finally:
1062#             socket.setdefaulttimeout(None)
1063#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1064#         ftp.close()
1065#
1066#     def testTimeoutDefault(self):
1067#         # global default timeout is used
1068#         import socket
1069#         self.assertIsNone(socket.getdefaulttimeout())
1070#         socket.setdefaulttimeout(30)
1071#         try:
1072#             ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1073#         finally:
1074#             socket.setdefaulttimeout(None)
1075#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1076#         ftp.close()
1077#
1078#     def testTimeoutValue(self):
1079#         ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1080#                                 timeout=30)
1081#         self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1082#         ftp.close()
1083
1084
1085
1086def test_main():
1087    import warnings
1088    with warnings.catch_warnings():
1089        warnings.filterwarnings('ignore', ".*urllib\.urlopen.*Python 3.0",
1090                                DeprecationWarning)
1091        test_support.run_unittest(
1092            urlopen_FileTests,
1093            urlopen_HttpTests,
1094            urlretrieve_FileTests,
1095            urlretrieve_HttpTests,
1096            ProxyTests,
1097            QuotingTests,
1098            UnquotingTests,
1099            urlencode_Tests,
1100            Pathname_Tests,
1101            Utility_Tests,
1102            URLopener_Tests,
1103            ProxyTests,
1104            ProxyTests_withOrderedEnv,
1105            #FTPWrapperTests,
1106        )
1107
1108
1109
1110if __name__ == '__main__':
1111    test_main()
1112