test_pyexpat.py revision 6c8b66cd261c6418566700527784b17bb459db1f
1# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
3
4from io import BytesIO
5import os
6import sys
7import sysconfig
8import unittest
9import traceback
10
11from xml.parsers import expat
12from xml.parsers.expat import errors
13
14from test.support import sortdict
15
16
17class SetAttributeTest(unittest.TestCase):
18    def setUp(self):
19        self.parser = expat.ParserCreate(namespace_separator='!')
20
21    def test_buffer_text(self):
22        self.assertIs(self.parser.buffer_text, False)
23        for x in 0, 1, 2, 0:
24            self.parser.buffer_text = x
25            self.assertIs(self.parser.buffer_text, bool(x))
26
27    def test_namespace_prefixes(self):
28        self.assertIs(self.parser.namespace_prefixes, False)
29        for x in 0, 1, 2, 0:
30            self.parser.namespace_prefixes = x
31            self.assertIs(self.parser.namespace_prefixes, bool(x))
32
33    def test_ordered_attributes(self):
34        self.assertIs(self.parser.ordered_attributes, False)
35        for x in 0, 1, 2, 0:
36            self.parser.ordered_attributes = x
37            self.assertIs(self.parser.ordered_attributes, bool(x))
38
39    def test_specified_attributes(self):
40        self.assertIs(self.parser.specified_attributes, False)
41        for x in 0, 1, 2, 0:
42            self.parser.specified_attributes = x
43            self.assertIs(self.parser.specified_attributes, bool(x))
44
45    def test_specified_attributes(self):
46        self.assertIs(self.parser.specified_attributes, False)
47        for x in 0, 1, 2, 0:
48            self.parser.specified_attributes = x
49            self.assertIs(self.parser.specified_attributes, bool(x))
50
51    def test_invalid_attributes(self):
52        with self.assertRaises(AttributeError):
53            self.parser.returns_unicode = 1
54        with self.assertRaises(AttributeError):
55            self.parser.returns_unicode
56
57        # Issue #25019
58        self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
59        self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
60        self.assertRaises(TypeError, getattr, self.parser, range(0xF))
61
62
63data = b'''\
64<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
65<?xml-stylesheet href="stylesheet.css"?>
66<!-- comment data -->
67<!DOCTYPE quotations SYSTEM "quotations.dtd" [
68<!ELEMENT root ANY>
69<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
70<!NOTATION notation SYSTEM "notation.jpeg">
71<!ENTITY acirc "&#226;">
72<!ENTITY external_entity SYSTEM "entity.file">
73<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
74%unparsed_entity;
75]>
76
77<root attr1="value1" attr2="value2&#8000;">
78<myns:subelement xmlns:myns="http://www.python.org/namespace">
79     Contents of subelements
80</myns:subelement>
81<sub2><![CDATA[contents of CDATA section]]></sub2>
82&external_entity;
83&skipped_entity;
84\xb5
85</root>
86'''
87
88
89# Produce UTF-8 output
90class ParseTest(unittest.TestCase):
91    class Outputter:
92        def __init__(self):
93            self.out = []
94
95        def StartElementHandler(self, name, attrs):
96            self.out.append('Start element: ' + repr(name) + ' ' +
97                            sortdict(attrs))
98
99        def EndElementHandler(self, name):
100            self.out.append('End element: ' + repr(name))
101
102        def CharacterDataHandler(self, data):
103            data = data.strip()
104            if data:
105                self.out.append('Character data: ' + repr(data))
106
107        def ProcessingInstructionHandler(self, target, data):
108            self.out.append('PI: ' + repr(target) + ' ' + repr(data))
109
110        def StartNamespaceDeclHandler(self, prefix, uri):
111            self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
112
113        def EndNamespaceDeclHandler(self, prefix):
114            self.out.append('End of NS decl: ' + repr(prefix))
115
116        def StartCdataSectionHandler(self):
117            self.out.append('Start of CDATA section')
118
119        def EndCdataSectionHandler(self):
120            self.out.append('End of CDATA section')
121
122        def CommentHandler(self, text):
123            self.out.append('Comment: ' + repr(text))
124
125        def NotationDeclHandler(self, *args):
126            name, base, sysid, pubid = args
127            self.out.append('Notation declared: %s' %(args,))
128
129        def UnparsedEntityDeclHandler(self, *args):
130            entityName, base, systemId, publicId, notationName = args
131            self.out.append('Unparsed entity decl: %s' %(args,))
132
133        def NotStandaloneHandler(self):
134            self.out.append('Not standalone')
135            return 1
136
137        def ExternalEntityRefHandler(self, *args):
138            context, base, sysId, pubId = args
139            self.out.append('External entity ref: %s' %(args[1:],))
140            return 1
141
142        def StartDoctypeDeclHandler(self, *args):
143            self.out.append(('Start doctype', args))
144            return 1
145
146        def EndDoctypeDeclHandler(self):
147            self.out.append("End doctype")
148            return 1
149
150        def EntityDeclHandler(self, *args):
151            self.out.append(('Entity declaration', args))
152            return 1
153
154        def XmlDeclHandler(self, *args):
155            self.out.append(('XML declaration', args))
156            return 1
157
158        def ElementDeclHandler(self, *args):
159            self.out.append(('Element declaration', args))
160            return 1
161
162        def AttlistDeclHandler(self, *args):
163            self.out.append(('Attribute list declaration', args))
164            return 1
165
166        def SkippedEntityHandler(self, *args):
167            self.out.append(("Skipped entity", args))
168            return 1
169
170        def DefaultHandler(self, userData):
171            pass
172
173        def DefaultHandlerExpand(self, userData):
174            pass
175
176    handler_names = [
177        'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
178        'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
179        'NotationDeclHandler', 'StartNamespaceDeclHandler',
180        'EndNamespaceDeclHandler', 'CommentHandler',
181        'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
182        'DefaultHandlerExpand', 'NotStandaloneHandler',
183        'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
184        'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
185        'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
186        ]
187
188    def _hookup_callbacks(self, parser, handler):
189        """
190        Set each of the callbacks defined on handler and named in
191        self.handler_names on the given parser.
192        """
193        for name in self.handler_names:
194            setattr(parser, name, getattr(handler, name))
195
196    def _verify_parse_output(self, operations):
197        expected_operations = [
198            ('XML declaration', ('1.0', 'iso-8859-1', 0)),
199            'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
200            "Comment: ' comment data '",
201            "Not standalone",
202            ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
203            ('Element declaration', ('root', (2, 0, None, ()))),
204            ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
205                1)),
206            ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
207                0)),
208            "Notation declared: ('notation', None, 'notation.jpeg', None)",
209            ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
210            ('Entity declaration', ('external_entity', 0, None, None,
211                'entity.file', None, None)),
212            "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
213            "Not standalone",
214            "End doctype",
215            "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
216            "NS decl: 'myns' 'http://www.python.org/namespace'",
217            "Start element: 'http://www.python.org/namespace!subelement' {}",
218            "Character data: 'Contents of subelements'",
219            "End element: 'http://www.python.org/namespace!subelement'",
220            "End of NS decl: 'myns'",
221            "Start element: 'sub2' {}",
222            'Start of CDATA section',
223            "Character data: 'contents of CDATA section'",
224            'End of CDATA section',
225            "End element: 'sub2'",
226            "External entity ref: (None, 'entity.file', None)",
227            ('Skipped entity', ('skipped_entity', 0)),
228            "Character data: '\xb5'",
229            "End element: 'root'",
230        ]
231        for operation, expected_operation in zip(operations, expected_operations):
232            self.assertEqual(operation, expected_operation)
233
234    def test_parse_bytes(self):
235        out = self.Outputter()
236        parser = expat.ParserCreate(namespace_separator='!')
237        self._hookup_callbacks(parser, out)
238
239        parser.Parse(data, 1)
240
241        operations = out.out
242        self._verify_parse_output(operations)
243        # Issue #6697.
244        self.assertRaises(AttributeError, getattr, parser, '\uD800')
245
246    def test_parse_str(self):
247        out = self.Outputter()
248        parser = expat.ParserCreate(namespace_separator='!')
249        self._hookup_callbacks(parser, out)
250
251        parser.Parse(data.decode('iso-8859-1'), 1)
252
253        operations = out.out
254        self._verify_parse_output(operations)
255
256    def test_parse_file(self):
257        # Try parsing a file
258        out = self.Outputter()
259        parser = expat.ParserCreate(namespace_separator='!')
260        self._hookup_callbacks(parser, out)
261        file = BytesIO(data)
262
263        parser.ParseFile(file)
264
265        operations = out.out
266        self._verify_parse_output(operations)
267
268    def test_parse_again(self):
269        parser = expat.ParserCreate()
270        file = BytesIO(data)
271        parser.ParseFile(file)
272        # Issue 6676: ensure a meaningful exception is raised when attempting
273        # to parse more than one XML document per xmlparser instance,
274        # a limitation of the Expat library.
275        with self.assertRaises(expat.error) as cm:
276            parser.ParseFile(file)
277        self.assertEqual(expat.ErrorString(cm.exception.code),
278                          expat.errors.XML_ERROR_FINISHED)
279
280class NamespaceSeparatorTest(unittest.TestCase):
281    def test_legal(self):
282        # Tests that make sure we get errors when the namespace_separator value
283        # is illegal, and that we don't for good values:
284        expat.ParserCreate()
285        expat.ParserCreate(namespace_separator=None)
286        expat.ParserCreate(namespace_separator=' ')
287
288    def test_illegal(self):
289        try:
290            expat.ParserCreate(namespace_separator=42)
291            self.fail()
292        except TypeError as e:
293            self.assertEqual(str(e),
294                'ParserCreate() argument 2 must be str or None, not int')
295
296        try:
297            expat.ParserCreate(namespace_separator='too long')
298            self.fail()
299        except ValueError as e:
300            self.assertEqual(str(e),
301                'namespace_separator must be at most one character, omitted, or None')
302
303    def test_zero_length(self):
304        # ParserCreate() needs to accept a namespace_separator of zero length
305        # to satisfy the requirements of RDF applications that are required
306        # to simply glue together the namespace URI and the localname.  Though
307        # considered a wart of the RDF specifications, it needs to be supported.
308        #
309        # See XML-SIG mailing list thread starting with
310        # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
311        #
312        expat.ParserCreate(namespace_separator='') # too short
313
314
315class InterningTest(unittest.TestCase):
316    def test(self):
317        # Test the interning machinery.
318        p = expat.ParserCreate()
319        L = []
320        def collector(name, *args):
321            L.append(name)
322        p.StartElementHandler = collector
323        p.EndElementHandler = collector
324        p.Parse(b"<e> <e/> <e></e> </e>", 1)
325        tag = L[0]
326        self.assertEqual(len(L), 6)
327        for entry in L:
328            # L should have the same string repeated over and over.
329            self.assertTrue(tag is entry)
330
331    def test_issue9402(self):
332        # create an ExternalEntityParserCreate with buffer text
333        class ExternalOutputter:
334            def __init__(self, parser):
335                self.parser = parser
336                self.parser_result = None
337
338            def ExternalEntityRefHandler(self, context, base, sysId, pubId):
339                external_parser = self.parser.ExternalEntityParserCreate("")
340                self.parser_result = external_parser.Parse(b"", 1)
341                return 1
342
343        parser = expat.ParserCreate(namespace_separator='!')
344        parser.buffer_text = 1
345        out = ExternalOutputter(parser)
346        parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
347        parser.Parse(data, 1)
348        self.assertEqual(out.parser_result, 1)
349
350
351class BufferTextTest(unittest.TestCase):
352    def setUp(self):
353        self.stuff = []
354        self.parser = expat.ParserCreate()
355        self.parser.buffer_text = 1
356        self.parser.CharacterDataHandler = self.CharacterDataHandler
357
358    def check(self, expected, label):
359        self.assertEqual(self.stuff, expected,
360                "%s\nstuff    = %r\nexpected = %r"
361                % (label, self.stuff, map(str, expected)))
362
363    def CharacterDataHandler(self, text):
364        self.stuff.append(text)
365
366    def StartElementHandler(self, name, attrs):
367        self.stuff.append("<%s>" % name)
368        bt = attrs.get("buffer-text")
369        if bt == "yes":
370            self.parser.buffer_text = 1
371        elif bt == "no":
372            self.parser.buffer_text = 0
373
374    def EndElementHandler(self, name):
375        self.stuff.append("</%s>" % name)
376
377    def CommentHandler(self, data):
378        self.stuff.append("<!--%s-->" % data)
379
380    def setHandlers(self, handlers=[]):
381        for name in handlers:
382            setattr(self.parser, name, getattr(self, name))
383
384    def test_default_to_disabled(self):
385        parser = expat.ParserCreate()
386        self.assertFalse(parser.buffer_text)
387
388    def test_buffering_enabled(self):
389        # Make sure buffering is turned on
390        self.assertTrue(self.parser.buffer_text)
391        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
392        self.assertEqual(self.stuff, ['123'],
393                         "buffered text not properly collapsed")
394
395    def test1(self):
396        # XXX This test exposes more detail of Expat's text chunking than we
397        # XXX like, but it tests what we need to concisely.
398        self.setHandlers(["StartElementHandler"])
399        self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
400        self.assertEqual(self.stuff,
401                         ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
402                         "buffering control not reacting as expected")
403
404    def test2(self):
405        self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
406        self.assertEqual(self.stuff, ["1<2> \n 3"],
407                         "buffered text not properly collapsed")
408
409    def test3(self):
410        self.setHandlers(["StartElementHandler"])
411        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
412        self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
413                         "buffered text not properly split")
414
415    def test4(self):
416        self.setHandlers(["StartElementHandler", "EndElementHandler"])
417        self.parser.CharacterDataHandler = None
418        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", 1)
419        self.assertEqual(self.stuff,
420                         ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
421
422    def test5(self):
423        self.setHandlers(["StartElementHandler", "EndElementHandler"])
424        self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", 1)
425        self.assertEqual(self.stuff,
426            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
427
428    def test6(self):
429        self.setHandlers(["CommentHandler", "EndElementHandler",
430                    "StartElementHandler"])
431        self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", 1)
432        self.assertEqual(self.stuff,
433            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
434            "buffered text not properly split")
435
436    def test7(self):
437        self.setHandlers(["CommentHandler", "EndElementHandler",
438                    "StartElementHandler"])
439        self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
440        self.assertEqual(self.stuff,
441                         ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
442                          "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
443                         "buffered text not properly split")
444
445
446# Test handling of exception from callback:
447class HandlerExceptionTest(unittest.TestCase):
448    def StartElementHandler(self, name, attrs):
449        raise RuntimeError(name)
450
451    def check_traceback_entry(self, entry, filename, funcname):
452        self.assertEqual(os.path.basename(entry[0]), filename)
453        self.assertEqual(entry[2], funcname)
454
455    def test_exception(self):
456        parser = expat.ParserCreate()
457        parser.StartElementHandler = self.StartElementHandler
458        try:
459            parser.Parse(b"<a><b><c/></b></a>", 1)
460            self.fail()
461        except RuntimeError as e:
462            self.assertEqual(e.args[0], 'a',
463                             "Expected RuntimeError for element 'a', but" + \
464                             " found %r" % e.args[0])
465            # Check that the traceback contains the relevant line in pyexpat.c
466            entries = traceback.extract_tb(e.__traceback__)
467            self.assertEqual(len(entries), 3)
468            self.check_traceback_entry(entries[0],
469                                       "test_pyexpat.py", "test_exception")
470            self.check_traceback_entry(entries[1],
471                                       "pyexpat.c", "StartElement")
472            self.check_traceback_entry(entries[2],
473                                       "test_pyexpat.py", "StartElementHandler")
474            if sysconfig.is_python_build():
475                self.assertIn('call_with_frame("StartElement"', entries[1][3])
476
477
478# Test Current* members:
479class PositionTest(unittest.TestCase):
480    def StartElementHandler(self, name, attrs):
481        self.check_pos('s')
482
483    def EndElementHandler(self, name):
484        self.check_pos('e')
485
486    def check_pos(self, event):
487        pos = (event,
488               self.parser.CurrentByteIndex,
489               self.parser.CurrentLineNumber,
490               self.parser.CurrentColumnNumber)
491        self.assertTrue(self.upto < len(self.expected_list),
492                        'too many parser events')
493        expected = self.expected_list[self.upto]
494        self.assertEqual(pos, expected,
495                'Expected position %s, got position %s' %(pos, expected))
496        self.upto += 1
497
498    def test(self):
499        self.parser = expat.ParserCreate()
500        self.parser.StartElementHandler = self.StartElementHandler
501        self.parser.EndElementHandler = self.EndElementHandler
502        self.upto = 0
503        self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
504                              ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
505
506        xml = b'<a>\n <b>\n  <c/>\n </b>\n</a>'
507        self.parser.Parse(xml, 1)
508
509
510class sf1296433Test(unittest.TestCase):
511    def test_parse_only_xml_data(self):
512        # http://python.org/sf/1296433
513        #
514        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
515        # this one doesn't crash
516        #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
517
518        class SpecificException(Exception):
519            pass
520
521        def handler(text):
522            raise SpecificException
523
524        parser = expat.ParserCreate()
525        parser.CharacterDataHandler = handler
526
527        self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
528
529class ChardataBufferTest(unittest.TestCase):
530    """
531    test setting of chardata buffer size
532    """
533
534    def test_1025_bytes(self):
535        self.assertEqual(self.small_buffer_test(1025), 2)
536
537    def test_1000_bytes(self):
538        self.assertEqual(self.small_buffer_test(1000), 1)
539
540    def test_wrong_size(self):
541        parser = expat.ParserCreate()
542        parser.buffer_text = 1
543        with self.assertRaises(ValueError):
544            parser.buffer_size = -1
545        with self.assertRaises(ValueError):
546            parser.buffer_size = 0
547        with self.assertRaises((ValueError, OverflowError)):
548            parser.buffer_size = sys.maxsize + 1
549        with self.assertRaises(TypeError):
550            parser.buffer_size = 512.0
551
552    def test_unchanged_size(self):
553        xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
554        xml2 = b'a'*512 + b'</s>'
555        parser = expat.ParserCreate()
556        parser.CharacterDataHandler = self.counting_handler
557        parser.buffer_size = 512
558        parser.buffer_text = 1
559
560        # Feed 512 bytes of character data: the handler should be called
561        # once.
562        self.n = 0
563        parser.Parse(xml1)
564        self.assertEqual(self.n, 1)
565
566        # Reassign to buffer_size, but assign the same size.
567        parser.buffer_size = parser.buffer_size
568        self.assertEqual(self.n, 1)
569
570        # Try parsing rest of the document
571        parser.Parse(xml2)
572        self.assertEqual(self.n, 2)
573
574
575    def test_disabling_buffer(self):
576        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
577        xml2 = b'b' * 1024
578        xml3 = b'c' * 1024 + b'</a>';
579        parser = expat.ParserCreate()
580        parser.CharacterDataHandler = self.counting_handler
581        parser.buffer_text = 1
582        parser.buffer_size = 1024
583        self.assertEqual(parser.buffer_size, 1024)
584
585        # Parse one chunk of XML
586        self.n = 0
587        parser.Parse(xml1, 0)
588        self.assertEqual(parser.buffer_size, 1024)
589        self.assertEqual(self.n, 1)
590
591        # Turn off buffering and parse the next chunk.
592        parser.buffer_text = 0
593        self.assertFalse(parser.buffer_text)
594        self.assertEqual(parser.buffer_size, 1024)
595        for i in range(10):
596            parser.Parse(xml2, 0)
597        self.assertEqual(self.n, 11)
598
599        parser.buffer_text = 1
600        self.assertTrue(parser.buffer_text)
601        self.assertEqual(parser.buffer_size, 1024)
602        parser.Parse(xml3, 1)
603        self.assertEqual(self.n, 12)
604
605    def counting_handler(self, text):
606        self.n += 1
607
608    def small_buffer_test(self, buffer_len):
609        xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
610        parser = expat.ParserCreate()
611        parser.CharacterDataHandler = self.counting_handler
612        parser.buffer_size = 1024
613        parser.buffer_text = 1
614
615        self.n = 0
616        parser.Parse(xml)
617        return self.n
618
619    def test_change_size_1(self):
620        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
621        xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
622        parser = expat.ParserCreate()
623        parser.CharacterDataHandler = self.counting_handler
624        parser.buffer_text = 1
625        parser.buffer_size = 1024
626        self.assertEqual(parser.buffer_size, 1024)
627
628        self.n = 0
629        parser.Parse(xml1, 0)
630        parser.buffer_size *= 2
631        self.assertEqual(parser.buffer_size, 2048)
632        parser.Parse(xml2, 1)
633        self.assertEqual(self.n, 2)
634
635    def test_change_size_2(self):
636        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
637        xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
638        parser = expat.ParserCreate()
639        parser.CharacterDataHandler = self.counting_handler
640        parser.buffer_text = 1
641        parser.buffer_size = 2048
642        self.assertEqual(parser.buffer_size, 2048)
643
644        self.n=0
645        parser.Parse(xml1, 0)
646        parser.buffer_size = parser.buffer_size // 2
647        self.assertEqual(parser.buffer_size, 1024)
648        parser.Parse(xml2, 1)
649        self.assertEqual(self.n, 4)
650
651class MalformedInputTest(unittest.TestCase):
652    def test1(self):
653        xml = b"\0\r\n"
654        parser = expat.ParserCreate()
655        try:
656            parser.Parse(xml, True)
657            self.fail()
658        except expat.ExpatError as e:
659            self.assertEqual(str(e), 'unclosed token: line 2, column 0')
660
661    def test2(self):
662        # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
663        xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
664        parser = expat.ParserCreate()
665        try:
666            parser.Parse(xml, True)
667            self.fail()
668        except expat.ExpatError as e:
669            self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
670
671class ErrorMessageTest(unittest.TestCase):
672    def test_codes(self):
673        # verify mapping of errors.codes and errors.messages
674        self.assertEqual(errors.XML_ERROR_SYNTAX,
675                         errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
676
677    def test_expaterror(self):
678        xml = b'<'
679        parser = expat.ParserCreate()
680        try:
681            parser.Parse(xml, True)
682            self.fail()
683        except expat.ExpatError as e:
684            self.assertEqual(e.code,
685                             errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
686
687
688class ForeignDTDTests(unittest.TestCase):
689    """
690    Tests for the UseForeignDTD method of expat parser objects.
691    """
692    def test_use_foreign_dtd(self):
693        """
694        If UseForeignDTD is passed True and a document without an external
695        entity reference is parsed, ExternalEntityRefHandler is first called
696        with None for the public and system ids.
697        """
698        handler_call_args = []
699        def resolve_entity(context, base, system_id, public_id):
700            handler_call_args.append((public_id, system_id))
701            return 1
702
703        parser = expat.ParserCreate()
704        parser.UseForeignDTD(True)
705        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
706        parser.ExternalEntityRefHandler = resolve_entity
707        parser.Parse(b"<?xml version='1.0'?><element/>")
708        self.assertEqual(handler_call_args, [(None, None)])
709
710        # test UseForeignDTD() is equal to UseForeignDTD(True)
711        handler_call_args[:] = []
712
713        parser = expat.ParserCreate()
714        parser.UseForeignDTD()
715        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
716        parser.ExternalEntityRefHandler = resolve_entity
717        parser.Parse(b"<?xml version='1.0'?><element/>")
718        self.assertEqual(handler_call_args, [(None, None)])
719
720    def test_ignore_use_foreign_dtd(self):
721        """
722        If UseForeignDTD is passed True and a document with an external
723        entity reference is parsed, ExternalEntityRefHandler is called with
724        the public and system ids from the document.
725        """
726        handler_call_args = []
727        def resolve_entity(context, base, system_id, public_id):
728            handler_call_args.append((public_id, system_id))
729            return 1
730
731        parser = expat.ParserCreate()
732        parser.UseForeignDTD(True)
733        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
734        parser.ExternalEntityRefHandler = resolve_entity
735        parser.Parse(
736            b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
737        self.assertEqual(handler_call_args, [("bar", "baz")])
738
739
740if __name__ == "__main__":
741    unittest.main()
742