test_cgi.py revision d90f8d10e088657593fa753ecacab95845d378aa
1from test.support import run_unittest, check_warnings 2import cgi 3import os 4import sys 5import tempfile 6import unittest 7import warnings 8from collections import namedtuple 9from io import StringIO, BytesIO 10 11class HackedSysModule: 12 # The regression test will have real values in sys.argv, which 13 # will completely confuse the test of the cgi module 14 argv = [] 15 stdin = sys.stdin 16 17cgi.sys = HackedSysModule() 18 19class ComparableException: 20 def __init__(self, err): 21 self.err = err 22 23 def __str__(self): 24 return str(self.err) 25 26 def __eq__(self, anExc): 27 if not isinstance(anExc, Exception): 28 return NotImplemented 29 return (self.err.__class__ == anExc.__class__ and 30 self.err.args == anExc.args) 31 32 def __getattr__(self, attr): 33 return getattr(self.err, attr) 34 35def do_test(buf, method): 36 env = {} 37 if method == "GET": 38 fp = None 39 env['REQUEST_METHOD'] = 'GET' 40 env['QUERY_STRING'] = buf 41 elif method == "POST": 42 fp = BytesIO(buf.encode('latin-1')) # FieldStorage expects bytes 43 env['REQUEST_METHOD'] = 'POST' 44 env['CONTENT_TYPE'] = 'application/x-www-form-urlencoded' 45 env['CONTENT_LENGTH'] = str(len(buf)) 46 else: 47 raise ValueError("unknown method: %s" % method) 48 try: 49 return cgi.parse(fp, env, strict_parsing=1) 50 except Exception as err: 51 return ComparableException(err) 52 53parse_strict_test_cases = [ 54 ("", ValueError("bad query field: ''")), 55 ("&", ValueError("bad query field: ''")), 56 ("&&", ValueError("bad query field: ''")), 57 (";", ValueError("bad query field: ''")), 58 (";&;", ValueError("bad query field: ''")), 59 # Should the next few really be valid? 60 ("=", {}), 61 ("=&=", {}), 62 ("=;=", {}), 63 # This rest seem to make sense 64 ("=a", {'': ['a']}), 65 ("&=a", ValueError("bad query field: ''")), 66 ("=a&", ValueError("bad query field: ''")), 67 ("=&a", ValueError("bad query field: 'a'")), 68 ("b=a", {'b': ['a']}), 69 ("b+=a", {'b ': ['a']}), 70 ("a=b=a", {'a': ['b=a']}), 71 ("a=+b=a", {'a': [' b=a']}), 72 ("&b=a", ValueError("bad query field: ''")), 73 ("b&=a", ValueError("bad query field: 'b'")), 74 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 75 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), 76 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 77 ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 78 ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 79 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", 80 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 81 'cuyer': ['r'], 82 'expire': ['964546263'], 83 'kid': ['130003.300038'], 84 'lobale': ['en-US'], 85 'order_id': ['0bb2e248638833d48cb7fed300000f1b'], 86 'ss': ['env'], 87 'view': ['bustomer'], 88 }), 89 90 ("group_id=5470&set=custom&_assigned_to=31392&_status=1&_category=100&SUBMIT=Browse", 91 {'SUBMIT': ['Browse'], 92 '_assigned_to': ['31392'], 93 '_category': ['100'], 94 '_status': ['1'], 95 'group_id': ['5470'], 96 'set': ['custom'], 97 }) 98 ] 99 100def norm(seq): 101 return sorted(seq, key=repr) 102 103def first_elts(list): 104 return [p[0] for p in list] 105 106def first_second_elts(list): 107 return [(p[0], p[1][0]) for p in list] 108 109def gen_result(data, environ): 110 encoding = 'latin-1' 111 fake_stdin = BytesIO(data.encode(encoding)) 112 fake_stdin.seek(0) 113 form = cgi.FieldStorage(fp=fake_stdin, environ=environ, encoding=encoding) 114 115 result = {} 116 for k, v in dict(form).items(): 117 result[k] = isinstance(v, list) and form.getlist(k) or v.value 118 119 return result 120 121class CgiTests(unittest.TestCase): 122 123 def test_parse_multipart(self): 124 fp = BytesIO(POSTDATA.encode('latin1')) 125 env = {'boundary': BOUNDARY.encode('latin1'), 126 'CONTENT-LENGTH': '558'} 127 result = cgi.parse_multipart(fp, env) 128 expected = {'submit': [b' Add '], 'id': [b'1234'], 129 'file': [b'Testing 123.\n'], 'title': [b'']} 130 self.assertEqual(result, expected) 131 132 def test_fieldstorage_properties(self): 133 fs = cgi.FieldStorage() 134 self.assertFalse(fs) 135 self.assertIn("FieldStorage", repr(fs)) 136 self.assertEqual(list(fs), list(fs.keys())) 137 fs.list.append(namedtuple('MockFieldStorage', 'name')('fieldvalue')) 138 self.assertTrue(fs) 139 140 def test_fieldstorage_invalid(self): 141 self.assertRaises(TypeError, cgi.FieldStorage, "not-a-file-obj", 142 environ={"REQUEST_METHOD":"PUT"}) 143 self.assertRaises(TypeError, cgi.FieldStorage, "foo", "bar") 144 fs = cgi.FieldStorage(headers={'content-type':'text/plain'}) 145 self.assertRaises(TypeError, bool, fs) 146 147 def test_escape(self): 148 # cgi.escape() is deprecated. 149 with warnings.catch_warnings(): 150 warnings.filterwarnings('ignore', 'cgi\.escape', 151 DeprecationWarning) 152 self.assertEqual("test & string", cgi.escape("test & string")) 153 self.assertEqual("<test string>", cgi.escape("<test string>")) 154 self.assertEqual(""test string"", cgi.escape('"test string"', True)) 155 156 def test_strict(self): 157 for orig, expect in parse_strict_test_cases: 158 # Test basic parsing 159 d = do_test(orig, "GET") 160 self.assertEqual(d, expect, "Error parsing %s method GET" % repr(orig)) 161 d = do_test(orig, "POST") 162 self.assertEqual(d, expect, "Error parsing %s method POST" % repr(orig)) 163 164 env = {'QUERY_STRING': orig} 165 fs = cgi.FieldStorage(environ=env) 166 if isinstance(expect, dict): 167 # test dict interface 168 self.assertEqual(len(expect), len(fs)) 169 self.assertCountEqual(expect.keys(), fs.keys()) 170 ##self.assertEqual(norm(expect.values()), norm(fs.values())) 171 ##self.assertEqual(norm(expect.items()), norm(fs.items())) 172 self.assertEqual(fs.getvalue("nonexistent field", "default"), "default") 173 # test individual fields 174 for key in expect.keys(): 175 expect_val = expect[key] 176 self.assertIn(key, fs) 177 if len(expect_val) > 1: 178 self.assertEqual(fs.getvalue(key), expect_val) 179 else: 180 self.assertEqual(fs.getvalue(key), expect_val[0]) 181 182 def test_log(self): 183 cgi.log("Testing") 184 185 cgi.logfp = StringIO() 186 cgi.initlog("%s", "Testing initlog 1") 187 cgi.log("%s", "Testing log 2") 188 self.assertEqual(cgi.logfp.getvalue(), "Testing initlog 1\nTesting log 2\n") 189 if os.path.exists(os.devnull): 190 cgi.logfp = None 191 cgi.logfile = os.devnull 192 cgi.initlog("%s", "Testing log 3") 193 self.addCleanup(cgi.closelog) 194 cgi.log("Testing log 4") 195 196 def test_fieldstorage_readline(self): 197 # FieldStorage uses readline, which has the capacity to read all 198 # contents of the input file into memory; we use readline's size argument 199 # to prevent that for files that do not contain any newlines in 200 # non-GET/HEAD requests 201 class TestReadlineFile: 202 def __init__(self, file): 203 self.file = file 204 self.numcalls = 0 205 206 def readline(self, size=None): 207 self.numcalls += 1 208 if size: 209 return self.file.readline(size) 210 else: 211 return self.file.readline() 212 213 def __getattr__(self, name): 214 file = self.__dict__['file'] 215 a = getattr(file, name) 216 if not isinstance(a, int): 217 setattr(self, name, a) 218 return a 219 220 f = TestReadlineFile(tempfile.TemporaryFile("wb+")) 221 self.addCleanup(f.close) 222 f.write(b'x' * 256 * 1024) 223 f.seek(0) 224 env = {'REQUEST_METHOD':'PUT'} 225 fs = cgi.FieldStorage(fp=f, environ=env) 226 self.addCleanup(fs.file.close) 227 # if we're not chunking properly, readline is only called twice 228 # (by read_binary); if we are chunking properly, it will be called 5 times 229 # as long as the chunksize is 1 << 16. 230 self.assertGreater(f.numcalls, 2) 231 f.close() 232 233 def test_fieldstorage_multipart(self): 234 #Test basic FieldStorage multipart parsing 235 env = { 236 'REQUEST_METHOD': 'POST', 237 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 238 'CONTENT_LENGTH': '558'} 239 fp = BytesIO(POSTDATA.encode('latin-1')) 240 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 241 self.assertEqual(len(fs.list), 4) 242 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 243 {'name':'title', 'filename':None, 'value':''}, 244 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 245 {'name':'submit', 'filename':None, 'value':' Add '}] 246 for x in range(len(fs.list)): 247 for k, exp in expect[x].items(): 248 got = getattr(fs.list[x], k) 249 self.assertEqual(got, exp) 250 251 def test_fieldstorage_multipart_leading_whitespace(self): 252 env = { 253 'REQUEST_METHOD': 'POST', 254 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 255 'CONTENT_LENGTH': '560'} 256 # Add some leading whitespace to our post data that will cause the 257 # first line to not be the innerboundary. 258 fp = BytesIO(b"\r\n" + POSTDATA.encode('latin-1')) 259 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 260 self.assertEqual(len(fs.list), 4) 261 expect = [{'name':'id', 'filename':None, 'value':'1234'}, 262 {'name':'title', 'filename':None, 'value':''}, 263 {'name':'file', 'filename':'test.txt', 'value':b'Testing 123.\n'}, 264 {'name':'submit', 'filename':None, 'value':' Add '}] 265 for x in range(len(fs.list)): 266 for k, exp in expect[x].items(): 267 got = getattr(fs.list[x], k) 268 self.assertEqual(got, exp) 269 270 def test_fieldstorage_multipart_non_ascii(self): 271 #Test basic FieldStorage multipart parsing 272 env = {'REQUEST_METHOD':'POST', 273 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY), 274 'CONTENT_LENGTH':'558'} 275 for encoding in ['iso-8859-1','utf-8']: 276 fp = BytesIO(POSTDATA_NON_ASCII.encode(encoding)) 277 fs = cgi.FieldStorage(fp, environ=env,encoding=encoding) 278 self.assertEqual(len(fs.list), 1) 279 expect = [{'name':'id', 'filename':None, 'value':'\xe7\xf1\x80'}] 280 for x in range(len(fs.list)): 281 for k, exp in expect[x].items(): 282 got = getattr(fs.list[x], k) 283 self.assertEqual(got, exp) 284 285 def test_fieldstorage_multipart_maxline(self): 286 # Issue #18167 287 maxline = 1 << 16 288 self.maxDiff = None 289 def check(content): 290 data = """---123 291Content-Disposition: form-data; name="upload"; filename="fake.txt" 292Content-Type: text/plain 293 294%s 295---123-- 296""".replace('\n', '\r\n') % content 297 environ = { 298 'CONTENT_LENGTH': str(len(data)), 299 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 300 'REQUEST_METHOD': 'POST', 301 } 302 self.assertEqual(gen_result(data, environ), 303 {'upload': content.encode('latin1')}) 304 check('x' * (maxline - 1)) 305 check('x' * (maxline - 1) + '\r') 306 check('x' * (maxline - 1) + '\r' + 'y' * (maxline - 1)) 307 308 def test_fieldstorage_multipart_w3c(self): 309 # Test basic FieldStorage multipart parsing (W3C sample) 310 env = { 311 'REQUEST_METHOD': 'POST', 312 'CONTENT_TYPE': 'multipart/form-data; boundary={}'.format(BOUNDARY_W3), 313 'CONTENT_LENGTH': str(len(POSTDATA_W3))} 314 fp = BytesIO(POSTDATA_W3.encode('latin-1')) 315 fs = cgi.FieldStorage(fp, environ=env, encoding="latin-1") 316 self.assertEqual(len(fs.list), 2) 317 self.assertEqual(fs.list[0].name, 'submit-name') 318 self.assertEqual(fs.list[0].value, 'Larry') 319 self.assertEqual(fs.list[1].name, 'files') 320 files = fs.list[1].value 321 self.assertEqual(len(files), 2) 322 expect = [{'name': None, 'filename': 'file1.txt', 'value': b'... contents of file1.txt ...'}, 323 {'name': None, 'filename': 'file2.gif', 'value': b'...contents of file2.gif...'}] 324 for x in range(len(files)): 325 for k, exp in expect[x].items(): 326 got = getattr(files[x], k) 327 self.assertEqual(got, exp) 328 329 _qs_result = { 330 'key1': 'value1', 331 'key2': ['value2x', 'value2y'], 332 'key3': 'value3', 333 'key4': 'value4' 334 } 335 def testQSAndUrlEncode(self): 336 data = "key2=value2x&key3=value3&key4=value4" 337 environ = { 338 'CONTENT_LENGTH': str(len(data)), 339 'CONTENT_TYPE': 'application/x-www-form-urlencoded', 340 'QUERY_STRING': 'key1=value1&key2=value2y', 341 'REQUEST_METHOD': 'POST', 342 } 343 v = gen_result(data, environ) 344 self.assertEqual(self._qs_result, v) 345 346 def testQSAndFormData(self): 347 data = """---123 348Content-Disposition: form-data; name="key2" 349 350value2y 351---123 352Content-Disposition: form-data; name="key3" 353 354value3 355---123 356Content-Disposition: form-data; name="key4" 357 358value4 359---123-- 360""" 361 environ = { 362 'CONTENT_LENGTH': str(len(data)), 363 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 364 'QUERY_STRING': 'key1=value1&key2=value2x', 365 'REQUEST_METHOD': 'POST', 366 } 367 v = gen_result(data, environ) 368 self.assertEqual(self._qs_result, v) 369 370 def testQSAndFormDataFile(self): 371 data = """---123 372Content-Disposition: form-data; name="key2" 373 374value2y 375---123 376Content-Disposition: form-data; name="key3" 377 378value3 379---123 380Content-Disposition: form-data; name="key4" 381 382value4 383---123 384Content-Disposition: form-data; name="upload"; filename="fake.txt" 385Content-Type: text/plain 386 387this is the content of the fake file 388 389---123-- 390""" 391 environ = { 392 'CONTENT_LENGTH': str(len(data)), 393 'CONTENT_TYPE': 'multipart/form-data; boundary=-123', 394 'QUERY_STRING': 'key1=value1&key2=value2x', 395 'REQUEST_METHOD': 'POST', 396 } 397 result = self._qs_result.copy() 398 result.update({ 399 'upload': b'this is the content of the fake file\n' 400 }) 401 v = gen_result(data, environ) 402 self.assertEqual(result, v) 403 404 def test_deprecated_parse_qs(self): 405 # this func is moved to urllib.parse, this is just a sanity check 406 with check_warnings(('cgi.parse_qs is deprecated, use urllib.parse.' 407 'parse_qs instead', DeprecationWarning)): 408 self.assertEqual({'a': ['A1'], 'B': ['B3'], 'b': ['B2']}, 409 cgi.parse_qs('a=A1&b=B2&B=B3')) 410 411 def test_deprecated_parse_qsl(self): 412 # this func is moved to urllib.parse, this is just a sanity check 413 with check_warnings(('cgi.parse_qsl is deprecated, use urllib.parse.' 414 'parse_qsl instead', DeprecationWarning)): 415 self.assertEqual([('a', 'A1'), ('b', 'B2'), ('B', 'B3')], 416 cgi.parse_qsl('a=A1&b=B2&B=B3')) 417 418 def test_parse_header(self): 419 self.assertEqual( 420 cgi.parse_header("text/plain"), 421 ("text/plain", {})) 422 self.assertEqual( 423 cgi.parse_header("text/vnd.just.made.this.up ; "), 424 ("text/vnd.just.made.this.up", {})) 425 self.assertEqual( 426 cgi.parse_header("text/plain;charset=us-ascii"), 427 ("text/plain", {"charset": "us-ascii"})) 428 self.assertEqual( 429 cgi.parse_header('text/plain ; charset="us-ascii"'), 430 ("text/plain", {"charset": "us-ascii"})) 431 self.assertEqual( 432 cgi.parse_header('text/plain ; charset="us-ascii"; another=opt'), 433 ("text/plain", {"charset": "us-ascii", "another": "opt"})) 434 self.assertEqual( 435 cgi.parse_header('attachment; filename="silly.txt"'), 436 ("attachment", {"filename": "silly.txt"})) 437 self.assertEqual( 438 cgi.parse_header('attachment; filename="strange;name"'), 439 ("attachment", {"filename": "strange;name"})) 440 self.assertEqual( 441 cgi.parse_header('attachment; filename="strange;name";size=123;'), 442 ("attachment", {"filename": "strange;name", "size": "123"})) 443 self.assertEqual( 444 cgi.parse_header('form-data; name="files"; filename="fo\\"o;bar"'), 445 ("form-data", {"name": "files", "filename": 'fo"o;bar'})) 446 447 448BOUNDARY = "---------------------------721837373350705526688164684" 449 450POSTDATA = """-----------------------------721837373350705526688164684 451Content-Disposition: form-data; name="id" 452 4531234 454-----------------------------721837373350705526688164684 455Content-Disposition: form-data; name="title" 456 457 458-----------------------------721837373350705526688164684 459Content-Disposition: form-data; name="file"; filename="test.txt" 460Content-Type: text/plain 461 462Testing 123. 463 464-----------------------------721837373350705526688164684 465Content-Disposition: form-data; name="submit" 466 467 Add\x20 468-----------------------------721837373350705526688164684-- 469""" 470 471POSTDATA_NON_ASCII = """-----------------------------721837373350705526688164684 472Content-Disposition: form-data; name="id" 473 474\xe7\xf1\x80 475-----------------------------721837373350705526688164684 476""" 477 478# http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4 479BOUNDARY_W3 = "AaB03x" 480POSTDATA_W3 = """--AaB03x 481Content-Disposition: form-data; name="submit-name" 482 483Larry 484--AaB03x 485Content-Disposition: form-data; name="files" 486Content-Type: multipart/mixed; boundary=BbC04y 487 488--BbC04y 489Content-Disposition: file; filename="file1.txt" 490Content-Type: text/plain 491 492... contents of file1.txt ... 493--BbC04y 494Content-Disposition: file; filename="file2.gif" 495Content-Type: image/gif 496Content-Transfer-Encoding: binary 497 498...contents of file2.gif... 499--BbC04y-- 500--AaB03x-- 501""" 502 503 504def test_main(): 505 run_unittest(CgiTests) 506 507if __name__ == '__main__': 508 test_main() 509