test_pyexpat.py revision 6c8b66cd261c6418566700527784b17bb459db1f
1# XXX TypeErrors on calling handlers, or on bad return values from a 2# handler, are obscure and unhelpful. 3 4from io import BytesIO 5import os 6import sys 7import sysconfig 8import unittest 9import traceback 10 11from xml.parsers import expat 12from xml.parsers.expat import errors 13 14from test.support import sortdict 15 16 17class SetAttributeTest(unittest.TestCase): 18 def setUp(self): 19 self.parser = expat.ParserCreate(namespace_separator='!') 20 21 def test_buffer_text(self): 22 self.assertIs(self.parser.buffer_text, False) 23 for x in 0, 1, 2, 0: 24 self.parser.buffer_text = x 25 self.assertIs(self.parser.buffer_text, bool(x)) 26 27 def test_namespace_prefixes(self): 28 self.assertIs(self.parser.namespace_prefixes, False) 29 for x in 0, 1, 2, 0: 30 self.parser.namespace_prefixes = x 31 self.assertIs(self.parser.namespace_prefixes, bool(x)) 32 33 def test_ordered_attributes(self): 34 self.assertIs(self.parser.ordered_attributes, False) 35 for x in 0, 1, 2, 0: 36 self.parser.ordered_attributes = x 37 self.assertIs(self.parser.ordered_attributes, bool(x)) 38 39 def test_specified_attributes(self): 40 self.assertIs(self.parser.specified_attributes, False) 41 for x in 0, 1, 2, 0: 42 self.parser.specified_attributes = x 43 self.assertIs(self.parser.specified_attributes, bool(x)) 44 45 def test_specified_attributes(self): 46 self.assertIs(self.parser.specified_attributes, False) 47 for x in 0, 1, 2, 0: 48 self.parser.specified_attributes = x 49 self.assertIs(self.parser.specified_attributes, bool(x)) 50 51 def test_invalid_attributes(self): 52 with self.assertRaises(AttributeError): 53 self.parser.returns_unicode = 1 54 with self.assertRaises(AttributeError): 55 self.parser.returns_unicode 56 57 # Issue #25019 58 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0) 59 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0) 60 self.assertRaises(TypeError, getattr, self.parser, range(0xF)) 61 62 63data = b'''\ 64<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 65<?xml-stylesheet href="stylesheet.css"?> 66<!-- comment data --> 67<!DOCTYPE quotations SYSTEM "quotations.dtd" [ 68<!ELEMENT root ANY> 69<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED> 70<!NOTATION notation SYSTEM "notation.jpeg"> 71<!ENTITY acirc "â"> 72<!ENTITY external_entity SYSTEM "entity.file"> 73<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 74%unparsed_entity; 75]> 76 77<root attr1="value1" attr2="value2ὀ"> 78<myns:subelement xmlns:myns="http://www.python.org/namespace"> 79 Contents of subelements 80</myns:subelement> 81<sub2><![CDATA[contents of CDATA section]]></sub2> 82&external_entity; 83&skipped_entity; 84\xb5 85</root> 86''' 87 88 89# Produce UTF-8 output 90class ParseTest(unittest.TestCase): 91 class Outputter: 92 def __init__(self): 93 self.out = [] 94 95 def StartElementHandler(self, name, attrs): 96 self.out.append('Start element: ' + repr(name) + ' ' + 97 sortdict(attrs)) 98 99 def EndElementHandler(self, name): 100 self.out.append('End element: ' + repr(name)) 101 102 def CharacterDataHandler(self, data): 103 data = data.strip() 104 if data: 105 self.out.append('Character data: ' + repr(data)) 106 107 def ProcessingInstructionHandler(self, target, data): 108 self.out.append('PI: ' + repr(target) + ' ' + repr(data)) 109 110 def StartNamespaceDeclHandler(self, prefix, uri): 111 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) 112 113 def EndNamespaceDeclHandler(self, prefix): 114 self.out.append('End of NS decl: ' + repr(prefix)) 115 116 def StartCdataSectionHandler(self): 117 self.out.append('Start of CDATA section') 118 119 def EndCdataSectionHandler(self): 120 self.out.append('End of CDATA section') 121 122 def CommentHandler(self, text): 123 self.out.append('Comment: ' + repr(text)) 124 125 def NotationDeclHandler(self, *args): 126 name, base, sysid, pubid = args 127 self.out.append('Notation declared: %s' %(args,)) 128 129 def UnparsedEntityDeclHandler(self, *args): 130 entityName, base, systemId, publicId, notationName = args 131 self.out.append('Unparsed entity decl: %s' %(args,)) 132 133 def NotStandaloneHandler(self): 134 self.out.append('Not standalone') 135 return 1 136 137 def ExternalEntityRefHandler(self, *args): 138 context, base, sysId, pubId = args 139 self.out.append('External entity ref: %s' %(args[1:],)) 140 return 1 141 142 def StartDoctypeDeclHandler(self, *args): 143 self.out.append(('Start doctype', args)) 144 return 1 145 146 def EndDoctypeDeclHandler(self): 147 self.out.append("End doctype") 148 return 1 149 150 def EntityDeclHandler(self, *args): 151 self.out.append(('Entity declaration', args)) 152 return 1 153 154 def XmlDeclHandler(self, *args): 155 self.out.append(('XML declaration', args)) 156 return 1 157 158 def ElementDeclHandler(self, *args): 159 self.out.append(('Element declaration', args)) 160 return 1 161 162 def AttlistDeclHandler(self, *args): 163 self.out.append(('Attribute list declaration', args)) 164 return 1 165 166 def SkippedEntityHandler(self, *args): 167 self.out.append(("Skipped entity", args)) 168 return 1 169 170 def DefaultHandler(self, userData): 171 pass 172 173 def DefaultHandlerExpand(self, userData): 174 pass 175 176 handler_names = [ 177 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler', 178 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler', 179 'NotationDeclHandler', 'StartNamespaceDeclHandler', 180 'EndNamespaceDeclHandler', 'CommentHandler', 181 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler', 182 'DefaultHandlerExpand', 'NotStandaloneHandler', 183 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler', 184 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler', 185 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler', 186 ] 187 188 def _hookup_callbacks(self, parser, handler): 189 """ 190 Set each of the callbacks defined on handler and named in 191 self.handler_names on the given parser. 192 """ 193 for name in self.handler_names: 194 setattr(parser, name, getattr(handler, name)) 195 196 def _verify_parse_output(self, operations): 197 expected_operations = [ 198 ('XML declaration', ('1.0', 'iso-8859-1', 0)), 199 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'', 200 "Comment: ' comment data '", 201 "Not standalone", 202 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)), 203 ('Element declaration', ('root', (2, 0, None, ()))), 204 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None, 205 1)), 206 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None, 207 0)), 208 "Notation declared: ('notation', None, 'notation.jpeg', None)", 209 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)), 210 ('Entity declaration', ('external_entity', 0, None, None, 211 'entity.file', None, None)), 212 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')", 213 "Not standalone", 214 "End doctype", 215 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}", 216 "NS decl: 'myns' 'http://www.python.org/namespace'", 217 "Start element: 'http://www.python.org/namespace!subelement' {}", 218 "Character data: 'Contents of subelements'", 219 "End element: 'http://www.python.org/namespace!subelement'", 220 "End of NS decl: 'myns'", 221 "Start element: 'sub2' {}", 222 'Start of CDATA section', 223 "Character data: 'contents of CDATA section'", 224 'End of CDATA section', 225 "End element: 'sub2'", 226 "External entity ref: (None, 'entity.file', None)", 227 ('Skipped entity', ('skipped_entity', 0)), 228 "Character data: '\xb5'", 229 "End element: 'root'", 230 ] 231 for operation, expected_operation in zip(operations, expected_operations): 232 self.assertEqual(operation, expected_operation) 233 234 def test_parse_bytes(self): 235 out = self.Outputter() 236 parser = expat.ParserCreate(namespace_separator='!') 237 self._hookup_callbacks(parser, out) 238 239 parser.Parse(data, 1) 240 241 operations = out.out 242 self._verify_parse_output(operations) 243 # Issue #6697. 244 self.assertRaises(AttributeError, getattr, parser, '\uD800') 245 246 def test_parse_str(self): 247 out = self.Outputter() 248 parser = expat.ParserCreate(namespace_separator='!') 249 self._hookup_callbacks(parser, out) 250 251 parser.Parse(data.decode('iso-8859-1'), 1) 252 253 operations = out.out 254 self._verify_parse_output(operations) 255 256 def test_parse_file(self): 257 # Try parsing a file 258 out = self.Outputter() 259 parser = expat.ParserCreate(namespace_separator='!') 260 self._hookup_callbacks(parser, out) 261 file = BytesIO(data) 262 263 parser.ParseFile(file) 264 265 operations = out.out 266 self._verify_parse_output(operations) 267 268 def test_parse_again(self): 269 parser = expat.ParserCreate() 270 file = BytesIO(data) 271 parser.ParseFile(file) 272 # Issue 6676: ensure a meaningful exception is raised when attempting 273 # to parse more than one XML document per xmlparser instance, 274 # a limitation of the Expat library. 275 with self.assertRaises(expat.error) as cm: 276 parser.ParseFile(file) 277 self.assertEqual(expat.ErrorString(cm.exception.code), 278 expat.errors.XML_ERROR_FINISHED) 279 280class NamespaceSeparatorTest(unittest.TestCase): 281 def test_legal(self): 282 # Tests that make sure we get errors when the namespace_separator value 283 # is illegal, and that we don't for good values: 284 expat.ParserCreate() 285 expat.ParserCreate(namespace_separator=None) 286 expat.ParserCreate(namespace_separator=' ') 287 288 def test_illegal(self): 289 try: 290 expat.ParserCreate(namespace_separator=42) 291 self.fail() 292 except TypeError as e: 293 self.assertEqual(str(e), 294 'ParserCreate() argument 2 must be str or None, not int') 295 296 try: 297 expat.ParserCreate(namespace_separator='too long') 298 self.fail() 299 except ValueError as e: 300 self.assertEqual(str(e), 301 'namespace_separator must be at most one character, omitted, or None') 302 303 def test_zero_length(self): 304 # ParserCreate() needs to accept a namespace_separator of zero length 305 # to satisfy the requirements of RDF applications that are required 306 # to simply glue together the namespace URI and the localname. Though 307 # considered a wart of the RDF specifications, it needs to be supported. 308 # 309 # See XML-SIG mailing list thread starting with 310 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html 311 # 312 expat.ParserCreate(namespace_separator='') # too short 313 314 315class InterningTest(unittest.TestCase): 316 def test(self): 317 # Test the interning machinery. 318 p = expat.ParserCreate() 319 L = [] 320 def collector(name, *args): 321 L.append(name) 322 p.StartElementHandler = collector 323 p.EndElementHandler = collector 324 p.Parse(b"<e> <e/> <e></e> </e>", 1) 325 tag = L[0] 326 self.assertEqual(len(L), 6) 327 for entry in L: 328 # L should have the same string repeated over and over. 329 self.assertTrue(tag is entry) 330 331 def test_issue9402(self): 332 # create an ExternalEntityParserCreate with buffer text 333 class ExternalOutputter: 334 def __init__(self, parser): 335 self.parser = parser 336 self.parser_result = None 337 338 def ExternalEntityRefHandler(self, context, base, sysId, pubId): 339 external_parser = self.parser.ExternalEntityParserCreate("") 340 self.parser_result = external_parser.Parse(b"", 1) 341 return 1 342 343 parser = expat.ParserCreate(namespace_separator='!') 344 parser.buffer_text = 1 345 out = ExternalOutputter(parser) 346 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler 347 parser.Parse(data, 1) 348 self.assertEqual(out.parser_result, 1) 349 350 351class BufferTextTest(unittest.TestCase): 352 def setUp(self): 353 self.stuff = [] 354 self.parser = expat.ParserCreate() 355 self.parser.buffer_text = 1 356 self.parser.CharacterDataHandler = self.CharacterDataHandler 357 358 def check(self, expected, label): 359 self.assertEqual(self.stuff, expected, 360 "%s\nstuff = %r\nexpected = %r" 361 % (label, self.stuff, map(str, expected))) 362 363 def CharacterDataHandler(self, text): 364 self.stuff.append(text) 365 366 def StartElementHandler(self, name, attrs): 367 self.stuff.append("<%s>" % name) 368 bt = attrs.get("buffer-text") 369 if bt == "yes": 370 self.parser.buffer_text = 1 371 elif bt == "no": 372 self.parser.buffer_text = 0 373 374 def EndElementHandler(self, name): 375 self.stuff.append("</%s>" % name) 376 377 def CommentHandler(self, data): 378 self.stuff.append("<!--%s-->" % data) 379 380 def setHandlers(self, handlers=[]): 381 for name in handlers: 382 setattr(self.parser, name, getattr(self, name)) 383 384 def test_default_to_disabled(self): 385 parser = expat.ParserCreate() 386 self.assertFalse(parser.buffer_text) 387 388 def test_buffering_enabled(self): 389 # Make sure buffering is turned on 390 self.assertTrue(self.parser.buffer_text) 391 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1) 392 self.assertEqual(self.stuff, ['123'], 393 "buffered text not properly collapsed") 394 395 def test1(self): 396 # XXX This test exposes more detail of Expat's text chunking than we 397 # XXX like, but it tests what we need to concisely. 398 self.setHandlers(["StartElementHandler"]) 399 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) 400 self.assertEqual(self.stuff, 401 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], 402 "buffering control not reacting as expected") 403 404 def test2(self): 405 self.parser.Parse(b"<a>1<b/><2><c/> \n 3</a>", 1) 406 self.assertEqual(self.stuff, ["1<2> \n 3"], 407 "buffered text not properly collapsed") 408 409 def test3(self): 410 self.setHandlers(["StartElementHandler"]) 411 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1) 412 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], 413 "buffered text not properly split") 414 415 def test4(self): 416 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 417 self.parser.CharacterDataHandler = None 418 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1) 419 self.assertEqual(self.stuff, 420 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) 421 422 def test5(self): 423 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 424 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1) 425 self.assertEqual(self.stuff, 426 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) 427 428 def test6(self): 429 self.setHandlers(["CommentHandler", "EndElementHandler", 430 "StartElementHandler"]) 431 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1) 432 self.assertEqual(self.stuff, 433 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], 434 "buffered text not properly split") 435 436 def test7(self): 437 self.setHandlers(["CommentHandler", "EndElementHandler", 438 "StartElementHandler"]) 439 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) 440 self.assertEqual(self.stuff, 441 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", 442 "<!--abc-->", "4", "<!--def-->", "5", "</a>"], 443 "buffered text not properly split") 444 445 446# Test handling of exception from callback: 447class HandlerExceptionTest(unittest.TestCase): 448 def StartElementHandler(self, name, attrs): 449 raise RuntimeError(name) 450 451 def check_traceback_entry(self, entry, filename, funcname): 452 self.assertEqual(os.path.basename(entry[0]), filename) 453 self.assertEqual(entry[2], funcname) 454 455 def test_exception(self): 456 parser = expat.ParserCreate() 457 parser.StartElementHandler = self.StartElementHandler 458 try: 459 parser.Parse(b"<a><b><c/></b></a>", 1) 460 self.fail() 461 except RuntimeError as e: 462 self.assertEqual(e.args[0], 'a', 463 "Expected RuntimeError for element 'a', but" + \ 464 " found %r" % e.args[0]) 465 # Check that the traceback contains the relevant line in pyexpat.c 466 entries = traceback.extract_tb(e.__traceback__) 467 self.assertEqual(len(entries), 3) 468 self.check_traceback_entry(entries[0], 469 "test_pyexpat.py", "test_exception") 470 self.check_traceback_entry(entries[1], 471 "pyexpat.c", "StartElement") 472 self.check_traceback_entry(entries[2], 473 "test_pyexpat.py", "StartElementHandler") 474 if sysconfig.is_python_build(): 475 self.assertIn('call_with_frame("StartElement"', entries[1][3]) 476 477 478# Test Current* members: 479class PositionTest(unittest.TestCase): 480 def StartElementHandler(self, name, attrs): 481 self.check_pos('s') 482 483 def EndElementHandler(self, name): 484 self.check_pos('e') 485 486 def check_pos(self, event): 487 pos = (event, 488 self.parser.CurrentByteIndex, 489 self.parser.CurrentLineNumber, 490 self.parser.CurrentColumnNumber) 491 self.assertTrue(self.upto < len(self.expected_list), 492 'too many parser events') 493 expected = self.expected_list[self.upto] 494 self.assertEqual(pos, expected, 495 'Expected position %s, got position %s' %(pos, expected)) 496 self.upto += 1 497 498 def test(self): 499 self.parser = expat.ParserCreate() 500 self.parser.StartElementHandler = self.StartElementHandler 501 self.parser.EndElementHandler = self.EndElementHandler 502 self.upto = 0 503 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), 504 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] 505 506 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>' 507 self.parser.Parse(xml, 1) 508 509 510class sf1296433Test(unittest.TestCase): 511 def test_parse_only_xml_data(self): 512 # http://python.org/sf/1296433 513 # 514 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) 515 # this one doesn't crash 516 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) 517 518 class SpecificException(Exception): 519 pass 520 521 def handler(text): 522 raise SpecificException 523 524 parser = expat.ParserCreate() 525 parser.CharacterDataHandler = handler 526 527 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859')) 528 529class ChardataBufferTest(unittest.TestCase): 530 """ 531 test setting of chardata buffer size 532 """ 533 534 def test_1025_bytes(self): 535 self.assertEqual(self.small_buffer_test(1025), 2) 536 537 def test_1000_bytes(self): 538 self.assertEqual(self.small_buffer_test(1000), 1) 539 540 def test_wrong_size(self): 541 parser = expat.ParserCreate() 542 parser.buffer_text = 1 543 with self.assertRaises(ValueError): 544 parser.buffer_size = -1 545 with self.assertRaises(ValueError): 546 parser.buffer_size = 0 547 with self.assertRaises((ValueError, OverflowError)): 548 parser.buffer_size = sys.maxsize + 1 549 with self.assertRaises(TypeError): 550 parser.buffer_size = 512.0 551 552 def test_unchanged_size(self): 553 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512 554 xml2 = b'a'*512 + b'</s>' 555 parser = expat.ParserCreate() 556 parser.CharacterDataHandler = self.counting_handler 557 parser.buffer_size = 512 558 parser.buffer_text = 1 559 560 # Feed 512 bytes of character data: the handler should be called 561 # once. 562 self.n = 0 563 parser.Parse(xml1) 564 self.assertEqual(self.n, 1) 565 566 # Reassign to buffer_size, but assign the same size. 567 parser.buffer_size = parser.buffer_size 568 self.assertEqual(self.n, 1) 569 570 # Try parsing rest of the document 571 parser.Parse(xml2) 572 self.assertEqual(self.n, 2) 573 574 575 def test_disabling_buffer(self): 576 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512 577 xml2 = b'b' * 1024 578 xml3 = b'c' * 1024 + b'</a>'; 579 parser = expat.ParserCreate() 580 parser.CharacterDataHandler = self.counting_handler 581 parser.buffer_text = 1 582 parser.buffer_size = 1024 583 self.assertEqual(parser.buffer_size, 1024) 584 585 # Parse one chunk of XML 586 self.n = 0 587 parser.Parse(xml1, 0) 588 self.assertEqual(parser.buffer_size, 1024) 589 self.assertEqual(self.n, 1) 590 591 # Turn off buffering and parse the next chunk. 592 parser.buffer_text = 0 593 self.assertFalse(parser.buffer_text) 594 self.assertEqual(parser.buffer_size, 1024) 595 for i in range(10): 596 parser.Parse(xml2, 0) 597 self.assertEqual(self.n, 11) 598 599 parser.buffer_text = 1 600 self.assertTrue(parser.buffer_text) 601 self.assertEqual(parser.buffer_size, 1024) 602 parser.Parse(xml3, 1) 603 self.assertEqual(self.n, 12) 604 605 def counting_handler(self, text): 606 self.n += 1 607 608 def small_buffer_test(self, buffer_len): 609 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>' 610 parser = expat.ParserCreate() 611 parser.CharacterDataHandler = self.counting_handler 612 parser.buffer_size = 1024 613 parser.buffer_text = 1 614 615 self.n = 0 616 parser.Parse(xml) 617 return self.n 618 619 def test_change_size_1(self): 620 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024 621 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' 622 parser = expat.ParserCreate() 623 parser.CharacterDataHandler = self.counting_handler 624 parser.buffer_text = 1 625 parser.buffer_size = 1024 626 self.assertEqual(parser.buffer_size, 1024) 627 628 self.n = 0 629 parser.Parse(xml1, 0) 630 parser.buffer_size *= 2 631 self.assertEqual(parser.buffer_size, 2048) 632 parser.Parse(xml2, 1) 633 self.assertEqual(self.n, 2) 634 635 def test_change_size_2(self): 636 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023 637 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' 638 parser = expat.ParserCreate() 639 parser.CharacterDataHandler = self.counting_handler 640 parser.buffer_text = 1 641 parser.buffer_size = 2048 642 self.assertEqual(parser.buffer_size, 2048) 643 644 self.n=0 645 parser.Parse(xml1, 0) 646 parser.buffer_size = parser.buffer_size // 2 647 self.assertEqual(parser.buffer_size, 1024) 648 parser.Parse(xml2, 1) 649 self.assertEqual(self.n, 4) 650 651class MalformedInputTest(unittest.TestCase): 652 def test1(self): 653 xml = b"\0\r\n" 654 parser = expat.ParserCreate() 655 try: 656 parser.Parse(xml, True) 657 self.fail() 658 except expat.ExpatError as e: 659 self.assertEqual(str(e), 'unclosed token: line 2, column 0') 660 661 def test2(self): 662 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) 663 xml = b"<?xml version\xc2\x85='1.0'?>\r\n" 664 parser = expat.ParserCreate() 665 try: 666 parser.Parse(xml, True) 667 self.fail() 668 except expat.ExpatError as e: 669 self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14') 670 671class ErrorMessageTest(unittest.TestCase): 672 def test_codes(self): 673 # verify mapping of errors.codes and errors.messages 674 self.assertEqual(errors.XML_ERROR_SYNTAX, 675 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) 676 677 def test_expaterror(self): 678 xml = b'<' 679 parser = expat.ParserCreate() 680 try: 681 parser.Parse(xml, True) 682 self.fail() 683 except expat.ExpatError as e: 684 self.assertEqual(e.code, 685 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) 686 687 688class ForeignDTDTests(unittest.TestCase): 689 """ 690 Tests for the UseForeignDTD method of expat parser objects. 691 """ 692 def test_use_foreign_dtd(self): 693 """ 694 If UseForeignDTD is passed True and a document without an external 695 entity reference is parsed, ExternalEntityRefHandler is first called 696 with None for the public and system ids. 697 """ 698 handler_call_args = [] 699 def resolve_entity(context, base, system_id, public_id): 700 handler_call_args.append((public_id, system_id)) 701 return 1 702 703 parser = expat.ParserCreate() 704 parser.UseForeignDTD(True) 705 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 706 parser.ExternalEntityRefHandler = resolve_entity 707 parser.Parse(b"<?xml version='1.0'?><element/>") 708 self.assertEqual(handler_call_args, [(None, None)]) 709 710 # test UseForeignDTD() is equal to UseForeignDTD(True) 711 handler_call_args[:] = [] 712 713 parser = expat.ParserCreate() 714 parser.UseForeignDTD() 715 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 716 parser.ExternalEntityRefHandler = resolve_entity 717 parser.Parse(b"<?xml version='1.0'?><element/>") 718 self.assertEqual(handler_call_args, [(None, None)]) 719 720 def test_ignore_use_foreign_dtd(self): 721 """ 722 If UseForeignDTD is passed True and a document with an external 723 entity reference is parsed, ExternalEntityRefHandler is called with 724 the public and system ids from the document. 725 """ 726 handler_call_args = [] 727 def resolve_entity(context, base, system_id, public_id): 728 handler_call_args.append((public_id, system_id)) 729 return 1 730 731 parser = expat.ParserCreate() 732 parser.UseForeignDTD(True) 733 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 734 parser.ExternalEntityRefHandler = resolve_entity 735 parser.Parse( 736 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") 737 self.assertEqual(handler_call_args, [("bar", "baz")]) 738 739 740if __name__ == "__main__": 741 unittest.main() 742