test_cgi.py revision d90f8d10e088657593fa753ecacab95845d378aa
1from test.support import run_unittest, check_warnings
2import cgi
3import os
4import sys
5import tempfile
6import unittest
7import warnings
8from collections import namedtuple
9from io import StringIO, BytesIO
10
11class HackedSysModule:
12    # The regression test will have real values in sys.argv, which
13    # will completely confuse the test of the cgi module
14    argv = []
15    stdin = sys.stdin
16
17cgi.sys = HackedSysModule()
18
19class ComparableException:
20    def __init__(self, err):
21        self.err = err
22
23    def __str__(self):
24        return str(self.err)
25
26    def __eq__(self, anExc):
27        if not isinstance(anExc, Exception):
28            return NotImplemented
29        return (self.err.__class__ == anExc.__class__ and
30                self.err.args == anExc.args)
31
32    def __getattr__(self, attr):
33        return getattr(self.err, attr)
34
35def do_test(buf, method):
36    env = {}
37    if method == "GET":
38        fp = None
39        env['REQUEST_METHOD'] = 'GET'
40        env['QUERY_STRING'] = buf
41    elif method == "POST":
42        fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
43        env['REQUEST_METHOD'] = 'POST'
44        env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
45        env['CONTENT_LENGTH'] = str(len(buf))
46    else:
47        raise ValueError("unknown method: %s" % method)
48    try:
49        return cgi.parse(fp, env, strict_parsing=1)
50    except Exception as err:
51        return ComparableException(err)
52
53parse_strict_test_cases = [
54    ("", ValueError("bad query field: ''")),
55    ("&", ValueError("bad query field: ''")),
56    ("&&", ValueError("bad query field: ''")),
57    (";", ValueError("bad query field: ''")),
58    (";&;", ValueError("bad query field: ''")),
59    # Should the next few really be valid?
60    ("=", {}),
61    ("=&=", {}),
62    ("=;=", {}),
63    # This rest seem to make sense
64    ("=a", {'': ['a']}),
65    ("&=a", ValueError("bad query field: ''")),
66    ("=a&", ValueError("bad query field: ''")),
67    ("=&a", ValueError("bad query field: 'a'")),
68    ("b=a", {'b': ['a']}),
69    ("b+=a", {'b ': ['a']}),
70    ("a=b=a", {'a': ['b=a']}),
71    ("a=+b=a", {'a': [' b=a']}),
72    ("&b=a", ValueError("bad query field: ''")),
73    ("b&=a", ValueError("bad query field: 'b'")),
74    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
75    ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
76    ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
77    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
78    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
79    ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
80     {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
81      'cuyer': ['r'],
82      'expire': ['964546263'],
83      'kid': ['130003.300038'],
84      'lobale': ['en-US'],
85      'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
86      'ss': ['env'],
87      'view': ['bustomer'],
88      }),
89
90    ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
91     {'SUBMIT': ['Browse'],
92      '_assigned_to': ['31392'],
93      '_category': ['100'],
94      '_status': ['1'],
95      'group_id': ['5470'],
96      'set': ['custom'],
97      })
98    ]
99
100def norm(seq):
101    return sorted(seq, key=repr)
102
103def first_elts(list):
104    return [p[0] for p in list]
105
106def first_second_elts(list):
107    return [(p[0], p[1][0]) for p in list]
108
109def gen_result(data, environ):
110    encoding = 'latin-1'
111    fake_stdin = BytesIO(data.encode(encoding))
112    fake_stdin.seek(0)
113    form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
114
115    result = {}
116    for k, v in dict(form).items():
117        result[k] = isinstance(v, list) and form.getlist(k) or v.value
118
119    return result
120
121class CgiTests(unittest.TestCase):
122
123    def test_parse_multipart(self):
124        fp = BytesIO(POSTDATA.encode('latin1'))
125        env = {'boundary': BOUNDARY.encode('latin1'),
126               'CONTENT-LENGTH': '558'}
127        result = cgi.parse_multipart(fp, env)
128        expected = {'submit': [b' Add '], 'id': [b'1234'],
129                    'file': [b'Testing 123.\n'], 'title': [b'']}
130        self.assertEqual(result, expected)
131
132    def test_fieldstorage_properties(self):
133        fs = cgi.FieldStorage()
134        self.assertFalse(fs)
135        self.assertIn("FieldStorage", repr(fs))
136        self.assertEqual(list(fs), list(fs.keys()))
137        fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
138        self.assertTrue(fs)
139
140    def test_fieldstorage_invalid(self):
141        self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
142                                                            environ={"REQUEST_METHOD":"PUT"})
143        self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
144        fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
145        self.assertRaises(TypeError, bool, fs)
146
147    def test_escape(self):
148        # cgi.escape() is deprecated.
149        with warnings.catch_warnings():
150            warnings.filterwarnings('ignore', 'cgi\.escape',
151                                     DeprecationWarning)
152            self.assertEqual("test & string", cgi.escape("test & string"))
153            self.assertEqual("&lt;test string&gt;", cgi.escape("<test string>"))
154            self.assertEqual("&quot;test string&quot;", cgi.escape('"test string"', True))
155
156    def test_strict(self):
157        for orig, expect in parse_strict_test_cases:
158            # Test basic parsing
159            d = do_test(orig, "GET")
160            self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
161            d = do_test(orig, "POST")
162            self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
163
164            env = {'QUERY_STRING': orig}
165            fs = cgi.FieldStorage(environ=env)
166            if isinstance(expect, dict):
167                # test dict interface
168                self.assertEqual(len(expect), len(fs))
169                self.assertCountEqual(expect.keys(), fs.keys())
170                ##self.assertEqual(norm(expect.values()), norm(fs.values()))
171                ##self.assertEqual(norm(expect.items()), norm(fs.items()))
172                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
173                # test individual fields
174                for key in expect.keys():
175                    expect_val = expect[key]
176                    self.assertIn(key, fs)
177                    if len(expect_val) > 1:
178                        self.assertEqual(fs.getvalue(key), expect_val)
179                    else:
180                        self.assertEqual(fs.getvalue(key), expect_val[0])
181
182    def test_log(self):
183        cgi.log("Testing")
184
185        cgi.logfp = StringIO()
186        cgi.initlog("%s", "Testing initlog 1")
187        cgi.log("%s", "Testing log 2")
188        self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
189        if os.path.exists(os.devnull):
190            cgi.logfp = None
191            cgi.logfile = os.devnull
192            cgi.initlog("%s", "Testing log 3")
193            self.addCleanup(cgi.closelog)
194            cgi.log("Testing log 4")
195
196    def test_fieldstorage_readline(self):
197        # FieldStorage uses readline, which has the capacity to read all
198        # contents of the input file into memory; we use readline's size argument
199        # to prevent that for files that do not contain any newlines in
200        # non-GET/HEAD requests
201        class TestReadlineFile:
202            def __init__(self, file):
203                self.file = file
204                self.numcalls = 0
205
206            def readline(self, size=None):
207                self.numcalls += 1
208                if size:
209                    return self.file.readline(size)
210                else:
211                    return self.file.readline()
212
213            def __getattr__(self, name):
214                file = self.__dict__['file']
215                a = getattr(file, name)
216                if not isinstance(a, int):
217                    setattr(self, name, a)
218                return a
219
220        f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
221        self.addCleanup(f.close)
222        f.write(b'x' * 256 * 1024)
223        f.seek(0)
224        env = {'REQUEST_METHOD':'PUT'}
225        fs = cgi.FieldStorage(fp=f, environ=env)
226        self.addCleanup(fs.file.close)
227        # if we're not chunking properly, readline is only called twice
228        # (by read_binary); if we are chunking properly, it will be called 5 times
229        # as long as the chunksize is 1 << 16.
230        self.assertGreater(f.numcalls, 2)
231        f.close()
232
233    def test_fieldstorage_multipart(self):
234        #Test basic FieldStorage multipart parsing
235        env = {
236            'REQUEST_METHOD': 'POST',
237            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
238            'CONTENT_LENGTH': '558'}
239        fp = BytesIO(POSTDATA.encode('latin-1'))
240        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
241        self.assertEqual(len(fs.list), 4)
242        expect = [{'name':'id', 'filename':None, 'value':'1234'},
243                  {'name':'title', 'filename':None, 'value':''},
244                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
245                  {'name':'submit', 'filename':None, 'value':' Add '}]
246        for x in range(len(fs.list)):
247            for k, exp in expect[x].items():
248                got = getattr(fs.list[x], k)
249                self.assertEqual(got, exp)
250
251    def test_fieldstorage_multipart_leading_whitespace(self):
252        env = {
253            'REQUEST_METHOD': 'POST',
254            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
255            'CONTENT_LENGTH': '560'}
256        # Add some leading whitespace to our post data that will cause the
257        # first line to not be the innerboundary.
258        fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
259        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
260        self.assertEqual(len(fs.list), 4)
261        expect = [{'name':'id', 'filename':None, 'value':'1234'},
262                  {'name':'title', 'filename':None, 'value':''},
263                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
264                  {'name':'submit', 'filename':None, 'value':' Add '}]
265        for x in range(len(fs.list)):
266            for k, exp in expect[x].items():
267                got = getattr(fs.list[x], k)
268                self.assertEqual(got, exp)
269
270    def test_fieldstorage_multipart_non_ascii(self):
271        #Test basic FieldStorage multipart parsing
272        env = {'REQUEST_METHOD':'POST',
273            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
274            'CONTENT_LENGTH':'558'}
275        for encoding in ['iso-8859-1','utf-8']:
276            fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
277            fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
278            self.assertEqual(len(fs.list), 1)
279            expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
280            for x in range(len(fs.list)):
281                for k, exp in expect[x].items():
282                    got = getattr(fs.list[x], k)
283                    self.assertEqual(got, exp)
284
285    def test_fieldstorage_multipart_maxline(self):
286        # Issue #18167
287        maxline = 1 << 16
288        self.maxDiff = None
289        def check(content):
290            data = """---123
291Content-Disposition: form-data; name="upload"; filename="fake.txt"
292Content-Type: text/plain
293
294%s
295---123--
296""".replace('\n', '\r\n') % content
297            environ = {
298                'CONTENT_LENGTH':   str(len(data)),
299                'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
300                'REQUEST_METHOD':   'POST',
301            }
302            self.assertEqual(gen_result(data, environ),
303                             {'upload': content.encode('latin1')})
304        check('x' * (maxline - 1))
305        check('x' * (maxline - 1) + '\r')
306        check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
307
308    def test_fieldstorage_multipart_w3c(self):
309        # Test basic FieldStorage multipart parsing (W3C sample)
310        env = {
311            'REQUEST_METHOD': 'POST',
312            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
313            'CONTENT_LENGTH': str(len(POSTDATA_W3))}
314        fp = BytesIO(POSTDATA_W3.encode('latin-1'))
315        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
316        self.assertEqual(len(fs.list), 2)
317        self.assertEqual(fs.list[0].name, 'submit-name')
318        self.assertEqual(fs.list[0].value, 'Larry')
319        self.assertEqual(fs.list[1].name, 'files')
320        files = fs.list[1].value
321        self.assertEqual(len(files), 2)
322        expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
323                  {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
324        for x in range(len(files)):
325            for k, exp in expect[x].items():
326                got = getattr(files[x], k)
327                self.assertEqual(got, exp)
328
329    _qs_result = {
330        'key1': 'value1',
331        'key2': ['value2x', 'value2y'],
332        'key3': 'value3',
333        'key4': 'value4'
334    }
335    def testQSAndUrlEncode(self):
336        data = "key2=value2x&key3=value3&key4=value4"
337        environ = {
338            'CONTENT_LENGTH':   str(len(data)),
339            'CONTENT_TYPE':     'application/x-www-form-urlencoded',
340            'QUERY_STRING':     'key1=value1&key2=value2y',
341            'REQUEST_METHOD':   'POST',
342        }
343        v = gen_result(data, environ)
344        self.assertEqual(self._qs_result, v)
345
346    def testQSAndFormData(self):
347        data = """---123
348Content-Disposition: form-data; name="key2"
349
350value2y
351---123
352Content-Disposition: form-data; name="key3"
353
354value3
355---123
356Content-Disposition: form-data; name="key4"
357
358value4
359---123--
360"""
361        environ = {
362            'CONTENT_LENGTH':   str(len(data)),
363            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
364            'QUERY_STRING':     'key1=value1&key2=value2x',
365            'REQUEST_METHOD':   'POST',
366        }
367        v = gen_result(data, environ)
368        self.assertEqual(self._qs_result, v)
369
370    def testQSAndFormDataFile(self):
371        data = """---123
372Content-Disposition: form-data; name="key2"
373
374value2y
375---123
376Content-Disposition: form-data; name="key3"
377
378value3
379---123
380Content-Disposition: form-data; name="key4"
381
382value4
383---123
384Content-Disposition: form-data; name="upload"; filename="fake.txt"
385Content-Type: text/plain
386
387this is the content of the fake file
388
389---123--
390"""
391        environ = {
392            'CONTENT_LENGTH':   str(len(data)),
393            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
394            'QUERY_STRING':     'key1=value1&key2=value2x',
395            'REQUEST_METHOD':   'POST',
396        }
397        result = self._qs_result.copy()
398        result.update({
399            'upload': b'this is the content of the fake file\n'
400        })
401        v = gen_result(data, environ)
402        self.assertEqual(result, v)
403
404    def test_deprecated_parse_qs(self):
405        # this func is moved to urllib.parse, this is just a sanity check
406        with check_warnings(('cgi.parse_qs is deprecated, use urllib.parse.'
407                             'parse_qs instead', DeprecationWarning)):
408            self.assertEqual({'a': ['A1'], 'B': ['B3'], 'b': ['B2']},
409                             cgi.parse_qs('a=A1&b=B2&B=B3'))
410
411    def test_deprecated_parse_qsl(self):
412        # this func is moved to urllib.parse, this is just a sanity check
413        with check_warnings(('cgi.parse_qsl is deprecated, use urllib.parse.'
414                             'parse_qsl instead', DeprecationWarning)):
415            self.assertEqual([('a', 'A1'), ('b', 'B2'), ('B', 'B3')],
416                             cgi.parse_qsl('a=A1&b=B2&B=B3'))
417
418    def test_parse_header(self):
419        self.assertEqual(
420            cgi.parse_header("text/plain"),
421            ("text/plain", {}))
422        self.assertEqual(
423            cgi.parse_header("text/vnd.just.made.this.up ; "),
424            ("text/vnd.just.made.this.up", {}))
425        self.assertEqual(
426            cgi.parse_header("text/plain;charset=us-ascii"),
427            ("text/plain", {"charset": "us-ascii"}))
428        self.assertEqual(
429            cgi.parse_header('text/plain ; charset="us-ascii"'),
430            ("text/plain", {"charset": "us-ascii"}))
431        self.assertEqual(
432            cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
433            ("text/plain", {"charset": "us-ascii", "another": "opt"}))
434        self.assertEqual(
435            cgi.parse_header('attachment; filename="silly.txt"'),
436            ("attachment", {"filename": "silly.txt"}))
437        self.assertEqual(
438            cgi.parse_header('attachment; filename="strange;name"'),
439            ("attachment", {"filename": "strange;name"}))
440        self.assertEqual(
441            cgi.parse_header('attachment; filename="strange;name";size=123;'),
442            ("attachment", {"filename": "strange;name", "size": "123"}))
443        self.assertEqual(
444            cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
445            ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
446
447
448BOUNDARY = "---------------------------721837373350705526688164684"
449
450POSTDATA = """-----------------------------721837373350705526688164684
451Content-Disposition: form-data; name="id"
452
4531234
454-----------------------------721837373350705526688164684
455Content-Disposition: form-data; name="title"
456
457
458-----------------------------721837373350705526688164684
459Content-Disposition: form-data; name="file"; filename="test.txt"
460Content-Type: text/plain
461
462Testing 123.
463
464-----------------------------721837373350705526688164684
465Content-Disposition: form-data; name="submit"
466
467 Add\x20
468-----------------------------721837373350705526688164684--
469"""
470
471POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
472Content-Disposition: form-data; name="id"
473
474\xe7\xf1\x80
475-----------------------------721837373350705526688164684
476"""
477
478# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
479BOUNDARY_W3 = "AaB03x"
480POSTDATA_W3 = """--AaB03x
481Content-Disposition: form-data; name="submit-name"
482
483Larry
484--AaB03x
485Content-Disposition: form-data; name="files"
486Content-Type: multipart/mixed; boundary=BbC04y
487
488--BbC04y
489Content-Disposition: file; filename="file1.txt"
490Content-Type: text/plain
491
492... contents of file1.txt ...
493--BbC04y
494Content-Disposition: file; filename="file2.gif"
495Content-Type: image/gif
496Content-Transfer-Encoding: binary
497
498...contents of file2.gif...
499--BbC04y--
500--AaB03x--
501"""
502
503
504def test_main():
505    run_unittest(CgiTests)
506
507if __name__ == '__main__':
508    test_main()
509