1import io
2import unittest
3import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
6from xml.dom import pulldom
7
8from test.support import findfile
9
10
11tstfile = findfile("test.xml", subdir="xmltestdata")
12
13# A handy XML snippet, containing attributes, a namespace prefix, and a
14# self-closing tag:
15SMALL_SAMPLE = """<?xml version="1.0"?>
16<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
17<!-- A comment -->
18<title>Introduction to XSL</title>
19<hr/>
20<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
21</html>"""
22
23
24class PullDOMTestCase(unittest.TestCase):
25
26    def test_parse(self):
27        """Minimal test of DOMEventStream.parse()"""
28
29        # This just tests that parsing from a stream works. Actual parser
30        # semantics are tested using parseString with a more focused XML
31        # fragment.
32
33        # Test with a filename:
34        handler = pulldom.parse(tstfile)
35        self.addCleanup(handler.stream.close)
36        list(handler)
37
38        # Test with a file object:
39        with open(tstfile, "rb") as fin:
40            list(pulldom.parse(fin))
41
42    def test_parse_semantics(self):
43        """Test DOMEventStream parsing semantics."""
44
45        items = pulldom.parseString(SMALL_SAMPLE)
46        evt, node = next(items)
47        # Just check the node is a Document:
48        self.assertTrue(hasattr(node, "createElement"))
49        self.assertEqual(pulldom.START_DOCUMENT, evt)
50        evt, node = next(items)
51        self.assertEqual(pulldom.START_ELEMENT, evt)
52        self.assertEqual("html", node.tagName)
53        self.assertEqual(2, len(node.attributes))
54        self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
55              "http://www.xml.com/books")
56        evt, node = next(items)
57        self.assertEqual(pulldom.CHARACTERS, evt) # Line break
58        evt, node = next(items)
59        # XXX - A comment should be reported here!
60        # self.assertEqual(pulldom.COMMENT, evt)
61        # Line break after swallowed comment:
62        self.assertEqual(pulldom.CHARACTERS, evt)
63        evt, node = next(items)
64        self.assertEqual("title", node.tagName)
65        title_node = node
66        evt, node = next(items)
67        self.assertEqual(pulldom.CHARACTERS, evt)
68        self.assertEqual("Introduction to XSL", node.data)
69        evt, node = next(items)
70        self.assertEqual(pulldom.END_ELEMENT, evt)
71        self.assertEqual("title", node.tagName)
72        self.assertTrue(title_node is node)
73        evt, node = next(items)
74        self.assertEqual(pulldom.CHARACTERS, evt)
75        evt, node = next(items)
76        self.assertEqual(pulldom.START_ELEMENT, evt)
77        self.assertEqual("hr", node.tagName)
78        evt, node = next(items)
79        self.assertEqual(pulldom.END_ELEMENT, evt)
80        self.assertEqual("hr", node.tagName)
81        evt, node = next(items)
82        self.assertEqual(pulldom.CHARACTERS, evt)
83        evt, node = next(items)
84        self.assertEqual(pulldom.START_ELEMENT, evt)
85        self.assertEqual("p", node.tagName)
86        evt, node = next(items)
87        self.assertEqual(pulldom.START_ELEMENT, evt)
88        self.assertEqual("xdc:author", node.tagName)
89        evt, node = next(items)
90        self.assertEqual(pulldom.CHARACTERS, evt)
91        evt, node = next(items)
92        self.assertEqual(pulldom.END_ELEMENT, evt)
93        self.assertEqual("xdc:author", node.tagName)
94        evt, node = next(items)
95        self.assertEqual(pulldom.END_ELEMENT, evt)
96        evt, node = next(items)
97        self.assertEqual(pulldom.CHARACTERS, evt)
98        evt, node = next(items)
99        self.assertEqual(pulldom.END_ELEMENT, evt)
100        # XXX No END_DOCUMENT item is ever obtained:
101        #evt, node = next(items)
102        #self.assertEqual(pulldom.END_DOCUMENT, evt)
103
104    def test_expandItem(self):
105        """Ensure expandItem works as expected."""
106        items = pulldom.parseString(SMALL_SAMPLE)
107        # Loop through the nodes until we get to a "title" start tag:
108        for evt, item in items:
109            if evt == pulldom.START_ELEMENT and item.tagName == "title":
110                items.expandNode(item)
111                self.assertEqual(1, len(item.childNodes))
112                break
113        else:
114            self.fail("No \"title\" element detected in SMALL_SAMPLE!")
115        # Loop until we get to the next start-element:
116        for evt, node in items:
117            if evt == pulldom.START_ELEMENT:
118                break
119        self.assertEqual("hr", node.tagName,
120            "expandNode did not leave DOMEventStream in the correct state.")
121        # Attempt to expand a standalone element:
122        items.expandNode(node)
123        self.assertEqual(next(items)[0], pulldom.CHARACTERS)
124        evt, node = next(items)
125        self.assertEqual(node.tagName, "p")
126        items.expandNode(node)
127        next(items) # Skip character data
128        evt, node = next(items)
129        self.assertEqual(node.tagName, "html")
130        with self.assertRaises(StopIteration):
131            next(items)
132        items.clear()
133        self.assertIsNone(items.parser)
134        self.assertIsNone(items.stream)
135
136    @unittest.expectedFailure
137    def test_comment(self):
138        """PullDOM does not receive "comment" events."""
139        items = pulldom.parseString(SMALL_SAMPLE)
140        for evt, _ in items:
141            if evt == pulldom.COMMENT:
142                break
143        else:
144            self.fail("No comment was encountered")
145
146    @unittest.expectedFailure
147    def test_end_document(self):
148        """PullDOM does not receive "end-document" events."""
149        items = pulldom.parseString(SMALL_SAMPLE)
150        # Read all of the nodes up to and including </html>:
151        for evt, node in items:
152            if evt == pulldom.END_ELEMENT and node.tagName == "html":
153                break
154        try:
155            # Assert that the next node is END_DOCUMENT:
156            evt, node = next(items)
157            self.assertEqual(pulldom.END_DOCUMENT, evt)
158        except StopIteration:
159            self.fail(
160                "Ran out of events, but should have received END_DOCUMENT")
161
162
163class ThoroughTestCase(unittest.TestCase):
164    """Test the hard-to-reach parts of pulldom."""
165
166    def test_thorough_parse(self):
167        """Test some of the hard-to-reach parts of PullDOM."""
168        self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
169
170    @unittest.expectedFailure
171    def test_sax2dom_fail(self):
172        """SAX2DOM can"t handle a PI before the root element."""
173        pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
174        self._test_thorough(pd)
175
176    def test_thorough_sax2dom(self):
177        """Test some of the hard-to-reach parts of SAX2DOM."""
178        pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
179        self._test_thorough(pd, False)
180
181    def _test_thorough(self, pd, before_root=True):
182        """Test some of the hard-to-reach parts of the parser, using a mock
183        parser."""
184
185        evt, node = next(pd)
186        self.assertEqual(pulldom.START_DOCUMENT, evt)
187        # Just check the node is a Document:
188        self.assertTrue(hasattr(node, "createElement"))
189
190        if before_root:
191            evt, node = next(pd)
192            self.assertEqual(pulldom.COMMENT, evt)
193            self.assertEqual("a comment", node.data)
194            evt, node = next(pd)
195            self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
196            self.assertEqual("target", node.target)
197            self.assertEqual("data", node.data)
198
199        evt, node = next(pd)
200        self.assertEqual(pulldom.START_ELEMENT, evt)
201        self.assertEqual("html", node.tagName)
202
203        evt, node = next(pd)
204        self.assertEqual(pulldom.COMMENT, evt)
205        self.assertEqual("a comment", node.data)
206        evt, node = next(pd)
207        self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
208        self.assertEqual("target", node.target)
209        self.assertEqual("data", node.data)
210
211        evt, node = next(pd)
212        self.assertEqual(pulldom.START_ELEMENT, evt)
213        self.assertEqual("p", node.tagName)
214
215        evt, node = next(pd)
216        self.assertEqual(pulldom.CHARACTERS, evt)
217        self.assertEqual("text", node.data)
218        evt, node = next(pd)
219        self.assertEqual(pulldom.END_ELEMENT, evt)
220        self.assertEqual("p", node.tagName)
221        evt, node = next(pd)
222        self.assertEqual(pulldom.END_ELEMENT, evt)
223        self.assertEqual("html", node.tagName)
224        evt, node = next(pd)
225        self.assertEqual(pulldom.END_DOCUMENT, evt)
226
227
228class SAXExerciser(object):
229    """A fake sax parser that calls some of the harder-to-reach sax methods to
230    ensure it emits the correct events"""
231
232    def setContentHandler(self, handler):
233        self._handler = handler
234
235    def parse(self, _):
236        h = self._handler
237        h.startDocument()
238
239        # The next two items ensure that items preceding the first
240        # start_element are properly stored and emitted:
241        h.comment("a comment")
242        h.processingInstruction("target", "data")
243
244        h.startElement("html", AttributesImpl({}))
245
246        h.comment("a comment")
247        h.processingInstruction("target", "data")
248
249        h.startElement("p", AttributesImpl({"class": "paraclass"}))
250        h.characters("text")
251        h.endElement("p")
252        h.endElement("html")
253        h.endDocument()
254
255    def stub(self, *args, **kwargs):
256        """Stub method. Does nothing."""
257        pass
258    setProperty = stub
259    setFeature = stub
260
261
262class SAX2DOMExerciser(SAXExerciser):
263    """The same as SAXExerciser, but without the processing instruction and
264    comment before the root element, because S2D can"t handle it"""
265
266    def parse(self, _):
267        h = self._handler
268        h.startDocument()
269        h.startElement("html", AttributesImpl({}))
270        h.comment("a comment")
271        h.processingInstruction("target", "data")
272        h.startElement("p", AttributesImpl({"class": "paraclass"}))
273        h.characters("text")
274        h.endElement("p")
275        h.endElement("html")
276        h.endDocument()
277
278
279class SAX2DOMTestHelper(pulldom.DOMEventStream):
280    """Allows us to drive SAX2DOM from a DOMEventStream."""
281
282    def reset(self):
283        self.pulldom = pulldom.SAX2DOM()
284        # This content handler relies on namespace support
285        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
286        self.parser.setContentHandler(self.pulldom)
287
288
289class SAX2DOMTestCase(unittest.TestCase):
290
291    def confirm(self, test, testname="Test"):
292        self.assertTrue(test, testname)
293
294    def test_basic(self):
295        """Ensure SAX2DOM can parse from a stream."""
296        with io.StringIO(SMALL_SAMPLE) as fin:
297            sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
298                                   len(SMALL_SAMPLE))
299            for evt, node in sd:
300                if evt == pulldom.START_ELEMENT and node.tagName == "html":
301                    break
302            # Because the buffer is the same length as the XML, all the
303            # nodes should have been parsed and added:
304            self.assertGreater(len(node.childNodes), 0)
305
306    def testSAX2DOM(self):
307        """Ensure SAX2DOM expands nodes as expected."""
308        sax2dom = pulldom.SAX2DOM()
309        sax2dom.startDocument()
310        sax2dom.startElement("doc", {})
311        sax2dom.characters("text")
312        sax2dom.startElement("subelm", {})
313        sax2dom.characters("text")
314        sax2dom.endElement("subelm")
315        sax2dom.characters("text")
316        sax2dom.endElement("doc")
317        sax2dom.endDocument()
318
319        doc = sax2dom.document
320        root = doc.documentElement
321        (text1, elm1, text2) = root.childNodes
322        text3 = elm1.childNodes[0]
323
324        self.assertIsNone(text1.previousSibling)
325        self.assertIs(text1.nextSibling, elm1)
326        self.assertIs(elm1.previousSibling, text1)
327        self.assertIs(elm1.nextSibling, text2)
328        self.assertIs(text2.previousSibling, elm1)
329        self.assertIsNone(text2.nextSibling)
330        self.assertIsNone(text3.previousSibling)
331        self.assertIsNone(text3.nextSibling)
332
333        self.assertIs(root.parentNode, doc)
334        self.assertIs(text1.parentNode, root)
335        self.assertIs(elm1.parentNode, root)
336        self.assertIs(text2.parentNode, root)
337        self.assertIs(text3.parentNode, elm1)
338        doc.unlink()
339
340
341if __name__ == "__main__":
342    unittest.main()
343