183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# XXX TypeErrors on calling handlers, or on bad return values from a
283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# handler, are obscure and unhelpful.
383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport StringIO, sys
583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehimport unittest
683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom xml.parsers import expat
883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom test import test_support
1083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehfrom test.test_support import sortdict, run_unittest
1183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
1383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass SetAttributeTest(unittest.TestCase):
1483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def setUp(self):
1583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser = expat.ParserCreate(namespace_separator='!')
1683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.set_get_pairs = [
1783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [0, 0],
1883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [1, 1],
1983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [2, 1],
2083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            [0, 0],
2183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            ]
2283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_returns_unicode(self):
2483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for x, y in self.set_get_pairs:
2583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.parser.returns_unicode = x
2683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(self.parser.returns_unicode, y)
2783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
2883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_ordered_attributes(self):
2983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for x, y in self.set_get_pairs:
3083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.parser.ordered_attributes = x
3183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(self.parser.ordered_attributes, y)
3283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_specified_attributes(self):
3483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for x, y in self.set_get_pairs:
3583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.parser.specified_attributes = x
3683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(self.parser.specified_attributes, y)
3783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
3983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdata = '''\
4083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
4183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<?xml-stylesheet href="stylesheet.css"?>
4283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!-- comment data -->
4383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!DOCTYPE quotations SYSTEM "quotations.dtd" [
4483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!ELEMENT root ANY>
4583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!NOTATION notation SYSTEM "notation.jpeg">
4683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!ENTITY acirc "&#226;">
4783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!ENTITY external_entity SYSTEM "entity.file">
4883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
4983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh%unparsed_entity;
5083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh]>
5183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
5283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<root attr1="value1" attr2="value2&#8000;">
5383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<myns:subelement xmlns:myns="http://www.python.org/namespace">
5483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh     Contents of subelements
5583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh</myns:subelement>
5683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh<sub2><![CDATA[contents of CDATA section]]></sub2>
5783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh&external_entity;
5883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh</root>
5983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh'''
6083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Produce UTF-8 output
6383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass ParseTest(unittest.TestCase):
6483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    class Outputter:
6583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def __init__(self):
6683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out = []
6783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
6883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def StartElementHandler(self, name, attrs):
6983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Start element: ' + repr(name) + ' ' +
7083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                            sortdict(attrs))
7183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def EndElementHandler(self, name):
7383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('End element: ' + repr(name))
7483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
7583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def CharacterDataHandler(self, data):
7683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            data = data.strip()
7783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            if data:
7883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                self.out.append('Character data: ' + repr(data))
7983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def ProcessingInstructionHandler(self, target, data):
8183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('PI: ' + repr(target) + ' ' + repr(data))
8283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def StartNamespaceDeclHandler(self, prefix, uri):
8483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
8583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def EndNamespaceDeclHandler(self, prefix):
8783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('End of NS decl: ' + repr(prefix))
8883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
8983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def StartCdataSectionHandler(self):
9083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Start of CDATA section')
9183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def EndCdataSectionHandler(self):
9383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('End of CDATA section')
9483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def CommentHandler(self, text):
9683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Comment: ' + repr(text))
9783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
9883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def NotationDeclHandler(self, *args):
9983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            name, base, sysid, pubid = args
10083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Notation declared: %s' %(args,))
10183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def UnparsedEntityDeclHandler(self, *args):
10383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            entityName, base, systemId, publicId, notationName = args
10483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Unparsed entity decl: %s' %(args,))
10583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
10683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def NotStandaloneHandler(self, userData):
10783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('Not standalone')
10883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 1
10983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def ExternalEntityRefHandler(self, *args):
11183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            context, base, sysId, pubId = args
11283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.out.append('External entity ref: %s' %(args[1:],))
11383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 1
11483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def DefaultHandler(self, userData):
11683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            pass
11783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
11883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def DefaultHandlerExpand(self, userData):
11983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            pass
12083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
12183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    handler_names = [
12283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'StartElementHandler', 'EndElementHandler',
12383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'CharacterDataHandler', 'ProcessingInstructionHandler',
12483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'UnparsedEntityDeclHandler', 'NotationDeclHandler',
12583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
12683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'CommentHandler', 'StartCdataSectionHandler',
12783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'EndCdataSectionHandler',
12883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'DefaultHandler', 'DefaultHandlerExpand',
12983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        #'NotStandaloneHandler',
13083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        'ExternalEntityRefHandler'
13183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        ]
13283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_utf8(self):
13483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
13583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        out = self.Outputter()
13683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate(namespace_separator='!')
13783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for name in self.handler_names:
13883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            setattr(parser, name, getattr(out, name))
13983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.returns_unicode = 0
14083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(data, 1)
14183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
14283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Verify output
14383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        op = out.out
14483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
14583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[1], "Comment: ' comment data '")
14683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
14783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
14883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
14983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
15083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
15183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[7], "Character data: 'Contents of subelements'")
15283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'")
15383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[9], "End of NS decl: 'myns'")
15483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[10], "Start element: 'sub2' {}")
15583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[11], 'Start of CDATA section')
15683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[12], "Character data: 'contents of CDATA section'")
15783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[13], 'End of CDATA section')
15883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[14], "End element: 'sub2'")
15983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)")
16083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[16], "End element: 'root'")
16183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
16283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_unicode(self):
16383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Try the parse again, this time producing Unicode output
16483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        out = self.Outputter()
16583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate(namespace_separator='!')
16683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.returns_unicode = 1
16783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for name in self.handler_names:
16883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            setattr(parser, name, getattr(out, name))
16983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(data, 1)
17183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
17283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        op = out.out
17383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
17483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[1], "Comment: u' comment data '")
17583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
17683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
17783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
17883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
17983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
18083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[7], "Character data: u'Contents of subelements'")
18183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
18283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[9], "End of NS decl: u'myns'")
18383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[10], "Start element: u'sub2' {}")
18483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[11], 'Start of CDATA section')
18583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
18683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[13], 'End of CDATA section')
18783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[14], "End element: u'sub2'")
18883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
18983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[16], "End element: u'root'")
19083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
19183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_parse_file(self):
19283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Try parsing a file
19383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        out = self.Outputter()
19483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate(namespace_separator='!')
19583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.returns_unicode = 1
19683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for name in self.handler_names:
19783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            setattr(parser, name, getattr(out, name))
19883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        file = StringIO.StringIO(data)
19983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
20083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.ParseFile(file)
20183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
20283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        op = out.out
20383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
20483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[1], "Comment: u' comment data '")
20583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
20683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
20783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
20883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
20983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
21083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[7], "Character data: u'Contents of subelements'")
21183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
21283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[9], "End of NS decl: u'myns'")
21383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[10], "Start element: u'sub2' {}")
21483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[11], 'Start of CDATA section')
21583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
21683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[13], 'End of CDATA section')
21783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[14], "End element: u'sub2'")
21883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
21983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(op[16], "End element: u'root'")
22083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
22183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Issue 4877: expat.ParseFile causes segfault on a closed file.
22283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        fp = open(test_support.TESTFN, 'wb')
22383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
22483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            fp.close()
22583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser = expat.ParserCreate()
22683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            with self.assertRaises(ValueError):
22783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                parser.ParseFile(fp)
22883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        finally:
22983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            test_support.unlink(test_support.TESTFN)
23083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
23183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
23283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass NamespaceSeparatorTest(unittest.TestCase):
23383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_legal(self):
23483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Tests that make sure we get errors when the namespace_separator value
23583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # is illegal, and that we don't for good values:
23683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        expat.ParserCreate()
23783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        expat.ParserCreate(namespace_separator=None)
23883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        expat.ParserCreate(namespace_separator=' ')
23983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_illegal(self):
24183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
24283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            expat.ParserCreate(namespace_separator=42)
24383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.fail()
24483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except TypeError, e:
24583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(str(e),
24683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                'ParserCreate() argument 2 must be string or None, not int')
24783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
24883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
24983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            expat.ParserCreate(namespace_separator='too long')
25083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.fail()
25183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except ValueError, e:
25283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(str(e),
25383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                'namespace_separator must be at most one character, omitted, or None')
25483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
25583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_zero_length(self):
25683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # ParserCreate() needs to accept a namespace_separator of zero length
25783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # to satisfy the requirements of RDF applications that are required
25883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # to simply glue together the namespace URI and the localname.  Though
25983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # considered a wart of the RDF specifications, it needs to be supported.
26083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        #
26183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # See XML-SIG mailing list thread starting with
26283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
26383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        #
26483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        expat.ParserCreate(namespace_separator='') # too short
26583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
26683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
26783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass InterningTest(unittest.TestCase):
26883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test(self):
26983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Test the interning machinery.
27083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        p = expat.ParserCreate()
27183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        L = []
27283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def collector(name, *args):
27383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            L.append(name)
27483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        p.StartElementHandler = collector
27583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        p.EndElementHandler = collector
27683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        p.Parse("<e> <e/> <e></e> </e>", 1)
27783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        tag = L[0]
27883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(len(L), 6)
27983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for entry in L:
28083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            # L should have the same string repeated over and over.
28183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertTrue(tag is entry)
28283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
28483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass BufferTextTest(unittest.TestCase):
28583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def setUp(self):
28683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.stuff = []
28783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser = expat.ParserCreate()
28883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.buffer_text = 1
28983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.CharacterDataHandler = self.CharacterDataHandler
29083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def check(self, expected, label):
29283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff, expected,
29383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                "%s\nstuff    = %r\nexpected = %r"
29483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                % (label, self.stuff, map(unicode, expected)))
29583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def CharacterDataHandler(self, text):
29783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.stuff.append(text)
29883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
29983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def StartElementHandler(self, name, attrs):
30083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.stuff.append("<%s>" % name)
30183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        bt = attrs.get("buffer-text")
30283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        if bt == "yes":
30383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.parser.buffer_text = 1
30483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        elif bt == "no":
30583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.parser.buffer_text = 0
30683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
30783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def EndElementHandler(self, name):
30883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.stuff.append("</%s>" % name)
30983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
31083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def CommentHandler(self, data):
31183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.stuff.append("<!--%s-->" % data)
31283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
31383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def setHandlers(self, handlers=[]):
31483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for name in handlers:
31583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            setattr(self.parser, name, getattr(self, name))
31683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
31783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_default_to_disabled(self):
31883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
31983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertFalse(parser.buffer_text)
32083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
32183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_buffering_enabled(self):
32283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Make sure buffering is turned on
32383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(self.parser.buffer_text)
32483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
32583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff, ['123'],
32683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         "buffered text not properly collapsed")
32783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
32883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test1(self):
32983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # XXX This test exposes more detail of Expat's text chunking than we
33083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # XXX like, but it tests what we need to concisely.
33183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["StartElementHandler"])
33283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
33383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff,
33483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
33583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         "buffering control not reacting as expected")
33683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
33783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test2(self):
33883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", 1)
33983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff, ["1<2> \n 3"],
34083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         "buffered text not properly collapsed")
34183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
34283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test3(self):
34383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["StartElementHandler"])
34483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
34583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
34683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                          "buffered text not properly split")
34783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
34883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test4(self):
34983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["StartElementHandler", "EndElementHandler"])
35083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.CharacterDataHandler = None
35183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
35283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff,
35383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
35483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
35583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test5(self):
35683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["StartElementHandler", "EndElementHandler"])
35783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
35883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff,
35983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
36083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
36183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test6(self):
36283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["CommentHandler", "EndElementHandler",
36383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    "StartElementHandler"])
36483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
36583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff,
36683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
36783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            "buffered text not properly split")
36883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
36983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test7(self):
37083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.setHandlers(["CommentHandler", "EndElementHandler",
37183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                    "StartElementHandler"])
37283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
37383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.stuff,
37483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
37583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                          "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
37683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                         "buffered text not properly split")
37783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
37883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
37983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Test handling of exception from callback:
38083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass HandlerExceptionTest(unittest.TestCase):
38183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def StartElementHandler(self, name, attrs):
38283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        raise RuntimeError(name)
38383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
38483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test(self):
38583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
38683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.StartElementHandler = self.StartElementHandler
38783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
38883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser.Parse("<a><b><c/></b></a>", 1)
38983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.fail()
39083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except RuntimeError, e:
39183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(e.args[0], 'a',
39283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                             "Expected RuntimeError for element 'a', but" + \
39383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                             " found %r" % e.args[0])
39483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
39583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
39683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh# Test Current* members:
39783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass PositionTest(unittest.TestCase):
39883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def StartElementHandler(self, name, attrs):
39983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.check_pos('s')
40083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
40183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def EndElementHandler(self, name):
40283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.check_pos('e')
40383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
40483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def check_pos(self, event):
40583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        pos = (event,
40683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh               self.parser.CurrentByteIndex,
40783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh               self.parser.CurrentLineNumber,
40883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh               self.parser.CurrentColumnNumber)
40983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(self.upto < len(self.expected_list),
41083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                        'too many parser events')
41183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        expected = self.expected_list[self.upto]
41283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(pos, expected,
41383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                'Expected position %s, got position %s' %(pos, expected))
41483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.upto += 1
41583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
41683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test(self):
41783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser = expat.ParserCreate()
41883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.StartElementHandler = self.StartElementHandler
41983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.EndElementHandler = self.EndElementHandler
42083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.upto = 0
42183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
42283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                              ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
42383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
42483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml = '<a>\n <b>\n  <c/>\n </b>\n</a>'
42583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.parser.Parse(xml, 1)
42683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
42783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
42883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass sf1296433Test(unittest.TestCase):
42983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_parse_only_xml_data(self):
43083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # http://python.org/sf/1296433
43183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        #
43283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
43383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # this one doesn't crash
43483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
43583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
43683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        class SpecificException(Exception):
43783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            pass
43883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
43983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def handler(text):
44083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            raise SpecificException
44183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
44283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
44383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = handler
44483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
44583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(Exception, parser.Parse, xml)
44683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
44783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass ChardataBufferTest(unittest.TestCase):
44883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
44983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    test setting of chardata buffer size
45083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
45183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
45283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_1025_bytes(self):
45383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.small_buffer_test(1025), 2)
45483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
45583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_1000_bytes(self):
45683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.small_buffer_test(1000), 1)
45783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
45883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_wrong_size(self):
45983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
46083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
46183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def f(size):
46283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser.buffer_size = size
46383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
46483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(TypeError, f, sys.maxint+1)
46583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, f, -1)
46683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertRaises(ValueError, f, 0)
46783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
46883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_unchanged_size(self):
46983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
47083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml2 = 'a'*512 + '</s>'
47183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
47283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = self.counting_handler
47383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = 512
47483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
47583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
47683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Feed 512 bytes of character data: the handler should be called
47783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # once.
47883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n = 0
47983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml1)
48083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 1)
48183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
48283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Reassign to buffer_size, but assign the same size.
48383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = parser.buffer_size
48483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 1)
48583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
48683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Try parsing rest of the document
48783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml2)
48883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 2)
48983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
49083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
49183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_disabling_buffer(self):
49283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
49383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml2 = ('b' * 1024)
49483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml3 = "%s</a>" % ('c' * 1024)
49583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
49683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = self.counting_handler
49783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
49883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = 1024
49983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
50083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
50183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Parse one chunk of XML
50283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n = 0
50383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml1, 0)
50483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
50583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 1)
50683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
50783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # Turn off buffering and parse the next chunk.
50883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 0
50983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertFalse(parser.buffer_text)
51083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
51183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        for i in range(10):
51283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser.Parse(xml2, 0)
51383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 11)
51483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
51583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
51683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertTrue(parser.buffer_text)
51783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
51883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml3, 1)
51983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 12)
52083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
52183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
52283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
52383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def make_document(self, bytes):
52483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
52583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
52683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def counting_handler(self, text):
52783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n += 1
52883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
52983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def small_buffer_test(self, buffer_len):
53083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
53183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
53283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = self.counting_handler
53383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = 1024
53483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
53583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
53683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n = 0
53783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml)
53883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        return self.n
53983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
54083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_change_size_1(self):
54183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
54283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
54383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
54483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = self.counting_handler
54583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
54683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = 1024
54783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
54883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
54983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n = 0
55083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml1, 0)
55183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size *= 2
55283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 2048)
55383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml2, 1)
55483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 2)
55583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
55683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_change_size_2(self):
55783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
55883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
55983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
56083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.CharacterDataHandler = self.counting_handler
56183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_text = 1
56283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size = 2048
56383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 2048)
56483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
56583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.n=0
56683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml1, 0)
56783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.buffer_size //= 2
56883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(parser.buffer_size, 1024)
56983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(xml2, 1)
57083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(self.n, 4)
57183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
57283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass MalformedInputText(unittest.TestCase):
57383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test1(self):
57483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml = "\0\r\n"
57583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
57683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
57783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser.Parse(xml, True)
57883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.fail()
57983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except expat.ExpatError as e:
58083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(str(e), 'unclosed token: line 2, column 0')
58183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
58283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test2(self):
58383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        xml = "<?xml version\xc2\x85='1.0'?>\r\n"
58483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
58583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        try:
58683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            parser.Parse(xml, True)
58783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.fail()
58883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        except expat.ExpatError as e:
58983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
59083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
59183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehclass ForeignDTDTests(unittest.TestCase):
59283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
59383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    Tests for the UseForeignDTD method of expat parser objects.
59483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    """
59583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_use_foreign_dtd(self):
59683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
59783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        If UseForeignDTD is passed True and a document without an external
59883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        entity reference is parsed, ExternalEntityRefHandler is first called
59983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        with None for the public and system ids.
60083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
60183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        handler_call_args = []
60283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def resolve_entity(context, base, system_id, public_id):
60383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            handler_call_args.append((public_id, system_id))
60483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 1
60583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
60683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
60783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.UseForeignDTD(True)
60883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
60983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.ExternalEntityRefHandler = resolve_entity
61083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse("<?xml version='1.0'?><element/>")
61183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(handler_call_args, [(None, None)])
61283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
61383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        # test UseForeignDTD() is equal to UseForeignDTD(True)
61483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        handler_call_args[:] = []
61583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
61683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
61783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.UseForeignDTD()
61883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
61983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.ExternalEntityRefHandler = resolve_entity
62083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse("<?xml version='1.0'?><element/>")
62183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(handler_call_args, [(None, None)])
62283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
62383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    def test_ignore_use_foreign_dtd(self):
62483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
62583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        If UseForeignDTD is passed True and a document with an external
62683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        entity reference is parsed, ExternalEntityRefHandler is called with
62783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        the public and system ids from the document.
62883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        """
62983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        handler_call_args = []
63083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        def resolve_entity(context, base, system_id, public_id):
63183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            handler_call_args.append((public_id, system_id))
63283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            return 1
63383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
63483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser = expat.ParserCreate()
63583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.UseForeignDTD(True)
63683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
63783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.ExternalEntityRefHandler = resolve_entity
63883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        parser.Parse(
63983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh            "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
64083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh        self.assertEqual(handler_call_args, [("bar", "baz")])
64183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
64283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
64383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehdef test_main():
64483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    run_unittest(SetAttributeTest,
64583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 ParseTest,
64683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 NamespaceSeparatorTest,
64783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 InterningTest,
64883760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 BufferTextTest,
64983760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 HandlerExceptionTest,
65083760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 PositionTest,
65183760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 sf1296433Test,
65283760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 ChardataBufferTest,
65383760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 MalformedInputText,
65483760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh                 ForeignDTDTests)
65583760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh
65683760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsiehif __name__ == "__main__":
65783760d213fb3bec7b4117d266fcfbf6fe2ba14abAndrew Hsieh    test_main()
658