1from test.support import check_warnings
2import cgi
3import os
4import sys
5import tempfile
6import unittest
7import warnings
8from collections import namedtuple
9from io import StringIO, BytesIO
10from test import support
11
12class HackedSysModule:
13    # The regression test will have real values in sys.argv, which
14    # will completely confuse the test of the cgi module
15    argv = []
16    stdin = sys.stdin
17
18cgi.sys = HackedSysModule()
19
20class ComparableException:
21    def __init__(self, err):
22        self.err = err
23
24    def __str__(self):
25        return str(self.err)
26
27    def __eq__(self, anExc):
28        if not isinstance(anExc, Exception):
29            return NotImplemented
30        return (self.err.__class__ == anExc.__class__ and
31                self.err.args == anExc.args)
32
33    def __getattr__(self, attr):
34        return getattr(self.err, attr)
35
36def do_test(buf, method):
37    env = {}
38    if method == "GET":
39        fp = None
40        env['REQUEST_METHOD'] = 'GET'
41        env['QUERY_STRING'] = buf
42    elif method == "POST":
43        fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes
44        env['REQUEST_METHOD'] = 'POST'
45        env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded'
46        env['CONTENT_LENGTH'] = str(len(buf))
47    else:
48        raise ValueError("unknown method: %s" % method)
49    try:
50        return cgi.parse(fp, env, strict_parsing=1)
51    except Exception as err:
52        return ComparableException(err)
53
54parse_strict_test_cases = [
55    ("", ValueError("bad query field: ''")),
56    ("&", ValueError("bad query field: ''")),
57    ("&&", ValueError("bad query field: ''")),
58    (";", ValueError("bad query field: ''")),
59    (";&;", ValueError("bad query field: ''")),
60    # Should the next few really be valid?
61    ("=", {}),
62    ("=&=", {}),
63    ("=;=", {}),
64    # This rest seem to make sense
65    ("=a", {'': ['a']}),
66    ("&=a", ValueError("bad query field: ''")),
67    ("=a&", ValueError("bad query field: ''")),
68    ("=&a", ValueError("bad query field: 'a'")),
69    ("b=a", {'b': ['a']}),
70    ("b+=a", {'b ': ['a']}),
71    ("a=b=a", {'a': ['b=a']}),
72    ("a=+b=a", {'a': [' b=a']}),
73    ("&b=a", ValueError("bad query field: ''")),
74    ("b&=a", ValueError("bad query field: 'b'")),
75    ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
76    ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
77    ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
78    ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
79    ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
80    ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
81     {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
82      'cuyer': ['r'],
83      'expire': ['964546263'],
84      'kid': ['130003.300038'],
85      'lobale': ['en-US'],
86      'order_id': ['0bb2e248638833d48cb7fed300000f1b'],
87      'ss': ['env'],
88      'view': ['bustomer'],
89      }),
90
91    ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse",
92     {'SUBMIT': ['Browse'],
93      '_assigned_to': ['31392'],
94      '_category': ['100'],
95      '_status': ['1'],
96      'group_id': ['5470'],
97      'set': ['custom'],
98      })
99    ]
100
101def norm(seq):
102    return sorted(seq, key=repr)
103
104def first_elts(list):
105    return [p[0] for p in list]
106
107def first_second_elts(list):
108    return [(p[0], p[1][0]) for p in list]
109
110def gen_result(data, environ):
111    encoding = 'latin-1'
112    fake_stdin = BytesIO(data.encode(encoding))
113    fake_stdin.seek(0)
114    form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding)
115
116    result = {}
117    for k, v in dict(form).items():
118        result[k] = isinstance(v, list) and form.getlist(k) or v.value
119
120    return result
121
122class CgiTests(unittest.TestCase):
123
124    def test_parse_multipart(self):
125        fp = BytesIO(POSTDATA.encode('latin1'))
126        env = {'boundary': BOUNDARY.encode('latin1'),
127               'CONTENT-LENGTH': '558'}
128        result = cgi.parse_multipart(fp, env)
129        expected = {'submit': [b' Add '], 'id': [b'1234'],
130                    'file': [b'Testing 123.\n'], 'title': [b'']}
131        self.assertEqual(result, expected)
132
133    def test_fieldstorage_properties(self):
134        fs = cgi.FieldStorage()
135        self.assertFalse(fs)
136        self.assertIn("FieldStorage", repr(fs))
137        self.assertEqual(list(fs), list(fs.keys()))
138        fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue'))
139        self.assertTrue(fs)
140
141    def test_fieldstorage_invalid(self):
142        self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj",
143                                                            environ={"REQUEST_METHOD":"PUT"})
144        self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar")
145        fs = cgi.FieldStorage(headers={'content-type':'text/plain'})
146        self.assertRaises(TypeError, bool, fs)
147
148    def test_escape(self):
149        # cgi.escape() is deprecated.
150        with warnings.catch_warnings():
151            warnings.filterwarnings('ignore', r'cgi\.escape',
152                                     DeprecationWarning)
153            self.assertEqual("test & string", cgi.escape("test & string"))
154            self.assertEqual("&lt;test string&gt;", cgi.escape("<test string>"))
155            self.assertEqual("&quot;test string&quot;", cgi.escape('"test string"', True))
156
157    def test_strict(self):
158        for orig, expect in parse_strict_test_cases:
159            # Test basic parsing
160            d = do_test(orig, "GET")
161            self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig))
162            d = do_test(orig, "POST")
163            self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig))
164
165            env = {'QUERY_STRING': orig}
166            fs = cgi.FieldStorage(environ=env)
167            if isinstance(expect, dict):
168                # test dict interface
169                self.assertEqual(len(expect), len(fs))
170                self.assertCountEqual(expect.keys(), fs.keys())
171                ##self.assertEqual(norm(expect.values()), norm(fs.values()))
172                ##self.assertEqual(norm(expect.items()), norm(fs.items()))
173                self.assertEqual(fs.getvalue("nonexistent field", "default"), "default")
174                # test individual fields
175                for key in expect.keys():
176                    expect_val = expect[key]
177                    self.assertIn(key, fs)
178                    if len(expect_val) > 1:
179                        self.assertEqual(fs.getvalue(key), expect_val)
180                    else:
181                        self.assertEqual(fs.getvalue(key), expect_val[0])
182
183    def test_log(self):
184        cgi.log("Testing")
185
186        cgi.logfp = StringIO()
187        cgi.initlog("%s", "Testing initlog 1")
188        cgi.log("%s", "Testing log 2")
189        self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n")
190        if os.path.exists(os.devnull):
191            cgi.logfp = None
192            cgi.logfile = os.devnull
193            cgi.initlog("%s", "Testing log 3")
194            self.addCleanup(cgi.closelog)
195            cgi.log("Testing log 4")
196
197    def test_fieldstorage_readline(self):
198        # FieldStorage uses readline, which has the capacity to read all
199        # contents of the input file into memory; we use readline's size argument
200        # to prevent that for files that do not contain any newlines in
201        # non-GET/HEAD requests
202        class TestReadlineFile:
203            def __init__(self, file):
204                self.file = file
205                self.numcalls = 0
206
207            def readline(self, size=None):
208                self.numcalls += 1
209                if size:
210                    return self.file.readline(size)
211                else:
212                    return self.file.readline()
213
214            def __getattr__(self, name):
215                file = self.__dict__['file']
216                a = getattr(file, name)
217                if not isinstance(a, int):
218                    setattr(self, name, a)
219                return a
220
221        f = TestReadlineFile(tempfile.TemporaryFile("wb+"))
222        self.addCleanup(f.close)
223        f.write(b'x' * 256 * 1024)
224        f.seek(0)
225        env = {'REQUEST_METHOD':'PUT'}
226        fs = cgi.FieldStorage(fp=f, environ=env)
227        self.addCleanup(fs.file.close)
228        # if we're not chunking properly, readline is only called twice
229        # (by read_binary); if we are chunking properly, it will be called 5 times
230        # as long as the chunksize is 1 << 16.
231        self.assertGreater(f.numcalls, 2)
232        f.close()
233
234    def test_fieldstorage_multipart(self):
235        #Test basic FieldStorage multipart parsing
236        env = {
237            'REQUEST_METHOD': 'POST',
238            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
239            'CONTENT_LENGTH': '558'}
240        fp = BytesIO(POSTDATA.encode('latin-1'))
241        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
242        self.assertEqual(len(fs.list), 4)
243        expect = [{'name':'id', 'filename':None, 'value':'1234'},
244                  {'name':'title', 'filename':None, 'value':''},
245                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
246                  {'name':'submit', 'filename':None, 'value':' Add '}]
247        for x in range(len(fs.list)):
248            for k, exp in expect[x].items():
249                got = getattr(fs.list[x], k)
250                self.assertEqual(got, exp)
251
252    def test_fieldstorage_multipart_leading_whitespace(self):
253        env = {
254            'REQUEST_METHOD': 'POST',
255            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
256            'CONTENT_LENGTH': '560'}
257        # Add some leading whitespace to our post data that will cause the
258        # first line to not be the innerboundary.
259        fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1'))
260        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
261        self.assertEqual(len(fs.list), 4)
262        expect = [{'name':'id', 'filename':None, 'value':'1234'},
263                  {'name':'title', 'filename':None, 'value':''},
264                  {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'},
265                  {'name':'submit', 'filename':None, 'value':' Add '}]
266        for x in range(len(fs.list)):
267            for k, exp in expect[x].items():
268                got = getattr(fs.list[x], k)
269                self.assertEqual(got, exp)
270
271    def test_fieldstorage_multipart_non_ascii(self):
272        #Test basic FieldStorage multipart parsing
273        env = {'REQUEST_METHOD':'POST',
274            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
275            'CONTENT_LENGTH':'558'}
276        for encoding in ['iso-8859-1','utf-8']:
277            fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding))
278            fs = cgi.FieldStorage(fp, environ=env,encoding=encoding)
279            self.assertEqual(len(fs.list), 1)
280            expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}]
281            for x in range(len(fs.list)):
282                for k, exp in expect[x].items():
283                    got = getattr(fs.list[x], k)
284                    self.assertEqual(got, exp)
285
286    def test_fieldstorage_multipart_maxline(self):
287        # Issue #18167
288        maxline = 1 << 16
289        self.maxDiff = None
290        def check(content):
291            data = """---123
292Content-Disposition: form-data; name="upload"; filename="fake.txt"
293Content-Type: text/plain
294
295%s
296---123--
297""".replace('\n', '\r\n') % content
298            environ = {
299                'CONTENT_LENGTH':   str(len(data)),
300                'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
301                'REQUEST_METHOD':   'POST',
302            }
303            self.assertEqual(gen_result(data, environ),
304                             {'upload': content.encode('latin1')})
305        check('x' * (maxline - 1))
306        check('x' * (maxline - 1) + '\r')
307        check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1))
308
309    def test_fieldstorage_multipart_w3c(self):
310        # Test basic FieldStorage multipart parsing (W3C sample)
311        env = {
312            'REQUEST_METHOD': 'POST',
313            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3),
314            'CONTENT_LENGTH': str(len(POSTDATA_W3))}
315        fp = BytesIO(POSTDATA_W3.encode('latin-1'))
316        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
317        self.assertEqual(len(fs.list), 2)
318        self.assertEqual(fs.list[0].name, 'submit-name')
319        self.assertEqual(fs.list[0].value, 'Larry')
320        self.assertEqual(fs.list[1].name, 'files')
321        files = fs.list[1].value
322        self.assertEqual(len(files), 2)
323        expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'},
324                  {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}]
325        for x in range(len(files)):
326            for k, exp in expect[x].items():
327                got = getattr(files[x], k)
328                self.assertEqual(got, exp)
329
330    def test_fieldstorage_part_content_length(self):
331        BOUNDARY = "JfISa01"
332        POSTDATA = """--JfISa01
333Content-Disposition: form-data; name="submit-name"
334Content-Length: 5
335
336Larry
337--JfISa01"""
338        env = {
339            'REQUEST_METHOD': 'POST',
340            'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY),
341            'CONTENT_LENGTH': str(len(POSTDATA))}
342        fp = BytesIO(POSTDATA.encode('latin-1'))
343        fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1")
344        self.assertEqual(len(fs.list), 1)
345        self.assertEqual(fs.list[0].name, 'submit-name')
346        self.assertEqual(fs.list[0].value, 'Larry')
347
348    def test_fieldstorage_as_context_manager(self):
349        fp = BytesIO(b'x' * 10)
350        env = {'REQUEST_METHOD': 'PUT'}
351        with cgi.FieldStorage(fp=fp, environ=env) as fs:
352            content = fs.file.read()
353            self.assertFalse(fs.file.closed)
354        self.assertTrue(fs.file.closed)
355        self.assertEqual(content, 'x' * 10)
356        with self.assertRaisesRegex(ValueError, 'I/O operation on closed file'):
357            fs.file.read()
358
359    _qs_result = {
360        'key1': 'value1',
361        'key2': ['value2x', 'value2y'],
362        'key3': 'value3',
363        'key4': 'value4'
364    }
365    def testQSAndUrlEncode(self):
366        data = "key2=value2x&key3=value3&key4=value4"
367        environ = {
368            'CONTENT_LENGTH':   str(len(data)),
369            'CONTENT_TYPE':     'application/x-www-form-urlencoded',
370            'QUERY_STRING':     'key1=value1&key2=value2y',
371            'REQUEST_METHOD':   'POST',
372        }
373        v = gen_result(data, environ)
374        self.assertEqual(self._qs_result, v)
375
376    def testQSAndFormData(self):
377        data = """---123
378Content-Disposition: form-data; name="key2"
379
380value2y
381---123
382Content-Disposition: form-data; name="key3"
383
384value3
385---123
386Content-Disposition: form-data; name="key4"
387
388value4
389---123--
390"""
391        environ = {
392            'CONTENT_LENGTH':   str(len(data)),
393            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
394            'QUERY_STRING':     'key1=value1&key2=value2x',
395            'REQUEST_METHOD':   'POST',
396        }
397        v = gen_result(data, environ)
398        self.assertEqual(self._qs_result, v)
399
400    def testQSAndFormDataFile(self):
401        data = """---123
402Content-Disposition: form-data; name="key2"
403
404value2y
405---123
406Content-Disposition: form-data; name="key3"
407
408value3
409---123
410Content-Disposition: form-data; name="key4"
411
412value4
413---123
414Content-Disposition: form-data; name="upload"; filename="fake.txt"
415Content-Type: text/plain
416
417this is the content of the fake file
418
419---123--
420"""
421        environ = {
422            'CONTENT_LENGTH':   str(len(data)),
423            'CONTENT_TYPE':     'multipart/form-data; boundary=-123',
424            'QUERY_STRING':     'key1=value1&key2=value2x',
425            'REQUEST_METHOD':   'POST',
426        }
427        result = self._qs_result.copy()
428        result.update({
429            'upload': b'this is the content of the fake file\n'
430        })
431        v = gen_result(data, environ)
432        self.assertEqual(result, v)
433
434    def test_deprecated_parse_qs(self):
435        # this func is moved to urllib.parse, this is just a sanity check
436        with check_warnings(('cgi.parse_qs is deprecated, use urllib.parse.'
437                             'parse_qs instead', DeprecationWarning)):
438            self.assertEqual({'a': ['A1'], 'B': ['B3'], 'b': ['B2']},
439                             cgi.parse_qs('a=A1&b=B2&B=B3'))
440
441    def test_deprecated_parse_qsl(self):
442        # this func is moved to urllib.parse, this is just a sanity check
443        with check_warnings(('cgi.parse_qsl is deprecated, use urllib.parse.'
444                             'parse_qsl instead', DeprecationWarning)):
445            self.assertEqual([('a', 'A1'), ('b', 'B2'), ('B', 'B3')],
446                             cgi.parse_qsl('a=A1&b=B2&B=B3'))
447
448    def test_parse_header(self):
449        self.assertEqual(
450            cgi.parse_header("text/plain"),
451            ("text/plain", {}))
452        self.assertEqual(
453            cgi.parse_header("text/vnd.just.made.this.up ; "),
454            ("text/vnd.just.made.this.up", {}))
455        self.assertEqual(
456            cgi.parse_header("text/plain;charset=us-ascii"),
457            ("text/plain", {"charset": "us-ascii"}))
458        self.assertEqual(
459            cgi.parse_header('text/plain ; charset="us-ascii"'),
460            ("text/plain", {"charset": "us-ascii"}))
461        self.assertEqual(
462            cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'),
463            ("text/plain", {"charset": "us-ascii", "another": "opt"}))
464        self.assertEqual(
465            cgi.parse_header('attachment; filename="silly.txt"'),
466            ("attachment", {"filename": "silly.txt"}))
467        self.assertEqual(
468            cgi.parse_header('attachment; filename="strange;name"'),
469            ("attachment", {"filename": "strange;name"}))
470        self.assertEqual(
471            cgi.parse_header('attachment; filename="strange;name";size=123;'),
472            ("attachment", {"filename": "strange;name", "size": "123"}))
473        self.assertEqual(
474            cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'),
475            ("form-data", {"name": "files", "filename": 'fo"o;bar'}))
476
477    def test_all(self):
478        blacklist = {"logfile", "logfp", "initlog", "dolog", "nolog",
479                     "closelog", "log", "maxlen", "valid_boundary"}
480        support.check__all__(self, cgi, blacklist=blacklist)
481
482
483BOUNDARY = "---------------------------721837373350705526688164684"
484
485POSTDATA = """-----------------------------721837373350705526688164684
486Content-Disposition: form-data; name="id"
487
4881234
489-----------------------------721837373350705526688164684
490Content-Disposition: form-data; name="title"
491
492
493-----------------------------721837373350705526688164684
494Content-Disposition: form-data; name="file"; filename="test.txt"
495Content-Type: text/plain
496
497Testing 123.
498
499-----------------------------721837373350705526688164684
500Content-Disposition: form-data; name="submit"
501
502 Add\x20
503-----------------------------721837373350705526688164684--
504"""
505
506POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684
507Content-Disposition: form-data; name="id"
508
509\xe7\xf1\x80
510-----------------------------721837373350705526688164684
511"""
512
513# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4
514BOUNDARY_W3 = "AaB03x"
515POSTDATA_W3 = """--AaB03x
516Content-Disposition: form-data; name="submit-name"
517
518Larry
519--AaB03x
520Content-Disposition: form-data; name="files"
521Content-Type: multipart/mixed; boundary=BbC04y
522
523--BbC04y
524Content-Disposition: file; filename="file1.txt"
525Content-Type: text/plain
526
527... contents of file1.txt ...
528--BbC04y
529Content-Disposition: file; filename="file2.gif"
530Content-Type: image/gif
531Content-Transfer-Encoding: binary
532
533...contents of file2.gif...
534--BbC04y--
535--AaB03x--
536"""
537
538if __name__ == '__main__':
539    unittest.main()
540