1import io 2import unittest 3import xml.sax 4 5from xml.sax.xmlreader import AttributesImpl 6from xml.dom import pulldom 7 8from test.support import findfile 9 10 11tstfile = findfile("test.xml", subdir="xmltestdata") 12 13# A handy XML snippet, containing attributes, a namespace prefix, and a 14# self-closing tag: 15SMALL_SAMPLE = """<?xml version="1.0"?> 16<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> 17<!-- A comment --> 18<title>Introduction to XSL</title> 19<hr/> 20<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> 21</html>""" 22 23 24class PullDOMTestCase(unittest.TestCase): 25 26 def test_parse(self): 27 """Minimal test of DOMEventStream.parse()""" 28 29 # This just tests that parsing from a stream works. Actual parser 30 # semantics are tested using parseString with a more focused XML 31 # fragment. 32 33 # Test with a filename: 34 handler = pulldom.parse(tstfile) 35 self.addCleanup(handler.stream.close) 36 list(handler) 37 38 # Test with a file object: 39 with open(tstfile, "rb") as fin: 40 list(pulldom.parse(fin)) 41 42 def test_parse_semantics(self): 43 """Test DOMEventStream parsing semantics.""" 44 45 items = pulldom.parseString(SMALL_SAMPLE) 46 evt, node = next(items) 47 # Just check the node is a Document: 48 self.assertTrue(hasattr(node, "createElement")) 49 self.assertEqual(pulldom.START_DOCUMENT, evt) 50 evt, node = next(items) 51 self.assertEqual(pulldom.START_ELEMENT, evt) 52 self.assertEqual("html", node.tagName) 53 self.assertEqual(2, len(node.attributes)) 54 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, 55 "http://www.xml.com/books") 56 evt, node = next(items) 57 self.assertEqual(pulldom.CHARACTERS, evt) # Line break 58 evt, node = next(items) 59 # XXX - A comment should be reported here! 60 # self.assertEqual(pulldom.COMMENT, evt) 61 # Line break after swallowed comment: 62 self.assertEqual(pulldom.CHARACTERS, evt) 63 evt, node = next(items) 64 self.assertEqual("title", node.tagName) 65 title_node = node 66 evt, node = next(items) 67 self.assertEqual(pulldom.CHARACTERS, evt) 68 self.assertEqual("Introduction to XSL", node.data) 69 evt, node = next(items) 70 self.assertEqual(pulldom.END_ELEMENT, evt) 71 self.assertEqual("title", node.tagName) 72 self.assertTrue(title_node is node) 73 evt, node = next(items) 74 self.assertEqual(pulldom.CHARACTERS, evt) 75 evt, node = next(items) 76 self.assertEqual(pulldom.START_ELEMENT, evt) 77 self.assertEqual("hr", node.tagName) 78 evt, node = next(items) 79 self.assertEqual(pulldom.END_ELEMENT, evt) 80 self.assertEqual("hr", node.tagName) 81 evt, node = next(items) 82 self.assertEqual(pulldom.CHARACTERS, evt) 83 evt, node = next(items) 84 self.assertEqual(pulldom.START_ELEMENT, evt) 85 self.assertEqual("p", node.tagName) 86 evt, node = next(items) 87 self.assertEqual(pulldom.START_ELEMENT, evt) 88 self.assertEqual("xdc:author", node.tagName) 89 evt, node = next(items) 90 self.assertEqual(pulldom.CHARACTERS, evt) 91 evt, node = next(items) 92 self.assertEqual(pulldom.END_ELEMENT, evt) 93 self.assertEqual("xdc:author", node.tagName) 94 evt, node = next(items) 95 self.assertEqual(pulldom.END_ELEMENT, evt) 96 evt, node = next(items) 97 self.assertEqual(pulldom.CHARACTERS, evt) 98 evt, node = next(items) 99 self.assertEqual(pulldom.END_ELEMENT, evt) 100 # XXX No END_DOCUMENT item is ever obtained: 101 #evt, node = next(items) 102 #self.assertEqual(pulldom.END_DOCUMENT, evt) 103 104 def test_expandItem(self): 105 """Ensure expandItem works as expected.""" 106 items = pulldom.parseString(SMALL_SAMPLE) 107 # Loop through the nodes until we get to a "title" start tag: 108 for evt, item in items: 109 if evt == pulldom.START_ELEMENT and item.tagName == "title": 110 items.expandNode(item) 111 self.assertEqual(1, len(item.childNodes)) 112 break 113 else: 114 self.fail("No \"title\" element detected in SMALL_SAMPLE!") 115 # Loop until we get to the next start-element: 116 for evt, node in items: 117 if evt == pulldom.START_ELEMENT: 118 break 119 self.assertEqual("hr", node.tagName, 120 "expandNode did not leave DOMEventStream in the correct state.") 121 # Attempt to expand a standalone element: 122 items.expandNode(node) 123 self.assertEqual(next(items)[0], pulldom.CHARACTERS) 124 evt, node = next(items) 125 self.assertEqual(node.tagName, "p") 126 items.expandNode(node) 127 next(items) # Skip character data 128 evt, node = next(items) 129 self.assertEqual(node.tagName, "html") 130 with self.assertRaises(StopIteration): 131 next(items) 132 items.clear() 133 self.assertIsNone(items.parser) 134 self.assertIsNone(items.stream) 135 136 @unittest.expectedFailure 137 def test_comment(self): 138 """PullDOM does not receive "comment" events.""" 139 items = pulldom.parseString(SMALL_SAMPLE) 140 for evt, _ in items: 141 if evt == pulldom.COMMENT: 142 break 143 else: 144 self.fail("No comment was encountered") 145 146 @unittest.expectedFailure 147 def test_end_document(self): 148 """PullDOM does not receive "end-document" events.""" 149 items = pulldom.parseString(SMALL_SAMPLE) 150 # Read all of the nodes up to and including </html>: 151 for evt, node in items: 152 if evt == pulldom.END_ELEMENT and node.tagName == "html": 153 break 154 try: 155 # Assert that the next node is END_DOCUMENT: 156 evt, node = next(items) 157 self.assertEqual(pulldom.END_DOCUMENT, evt) 158 except StopIteration: 159 self.fail( 160 "Ran out of events, but should have received END_DOCUMENT") 161 162 163class ThoroughTestCase(unittest.TestCase): 164 """Test the hard-to-reach parts of pulldom.""" 165 166 def test_thorough_parse(self): 167 """Test some of the hard-to-reach parts of PullDOM.""" 168 self._test_thorough(pulldom.parse(None, parser=SAXExerciser())) 169 170 @unittest.expectedFailure 171 def test_sax2dom_fail(self): 172 """SAX2DOM can"t handle a PI before the root element.""" 173 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) 174 self._test_thorough(pd) 175 176 def test_thorough_sax2dom(self): 177 """Test some of the hard-to-reach parts of SAX2DOM.""" 178 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) 179 self._test_thorough(pd, False) 180 181 def _test_thorough(self, pd, before_root=True): 182 """Test some of the hard-to-reach parts of the parser, using a mock 183 parser.""" 184 185 evt, node = next(pd) 186 self.assertEqual(pulldom.START_DOCUMENT, evt) 187 # Just check the node is a Document: 188 self.assertTrue(hasattr(node, "createElement")) 189 190 if before_root: 191 evt, node = next(pd) 192 self.assertEqual(pulldom.COMMENT, evt) 193 self.assertEqual("a comment", node.data) 194 evt, node = next(pd) 195 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 196 self.assertEqual("target", node.target) 197 self.assertEqual("data", node.data) 198 199 evt, node = next(pd) 200 self.assertEqual(pulldom.START_ELEMENT, evt) 201 self.assertEqual("html", node.tagName) 202 203 evt, node = next(pd) 204 self.assertEqual(pulldom.COMMENT, evt) 205 self.assertEqual("a comment", node.data) 206 evt, node = next(pd) 207 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 208 self.assertEqual("target", node.target) 209 self.assertEqual("data", node.data) 210 211 evt, node = next(pd) 212 self.assertEqual(pulldom.START_ELEMENT, evt) 213 self.assertEqual("p", node.tagName) 214 215 evt, node = next(pd) 216 self.assertEqual(pulldom.CHARACTERS, evt) 217 self.assertEqual("text", node.data) 218 evt, node = next(pd) 219 self.assertEqual(pulldom.END_ELEMENT, evt) 220 self.assertEqual("p", node.tagName) 221 evt, node = next(pd) 222 self.assertEqual(pulldom.END_ELEMENT, evt) 223 self.assertEqual("html", node.tagName) 224 evt, node = next(pd) 225 self.assertEqual(pulldom.END_DOCUMENT, evt) 226 227 228class SAXExerciser(object): 229 """A fake sax parser that calls some of the harder-to-reach sax methods to 230 ensure it emits the correct events""" 231 232 def setContentHandler(self, handler): 233 self._handler = handler 234 235 def parse(self, _): 236 h = self._handler 237 h.startDocument() 238 239 # The next two items ensure that items preceding the first 240 # start_element are properly stored and emitted: 241 h.comment("a comment") 242 h.processingInstruction("target", "data") 243 244 h.startElement("html", AttributesImpl({})) 245 246 h.comment("a comment") 247 h.processingInstruction("target", "data") 248 249 h.startElement("p", AttributesImpl({"class": "paraclass"})) 250 h.characters("text") 251 h.endElement("p") 252 h.endElement("html") 253 h.endDocument() 254 255 def stub(self, *args, **kwargs): 256 """Stub method. Does nothing.""" 257 pass 258 setProperty = stub 259 setFeature = stub 260 261 262class SAX2DOMExerciser(SAXExerciser): 263 """The same as SAXExerciser, but without the processing instruction and 264 comment before the root element, because S2D can"t handle it""" 265 266 def parse(self, _): 267 h = self._handler 268 h.startDocument() 269 h.startElement("html", AttributesImpl({})) 270 h.comment("a comment") 271 h.processingInstruction("target", "data") 272 h.startElement("p", AttributesImpl({"class": "paraclass"})) 273 h.characters("text") 274 h.endElement("p") 275 h.endElement("html") 276 h.endDocument() 277 278 279class SAX2DOMTestHelper(pulldom.DOMEventStream): 280 """Allows us to drive SAX2DOM from a DOMEventStream.""" 281 282 def reset(self): 283 self.pulldom = pulldom.SAX2DOM() 284 # This content handler relies on namespace support 285 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) 286 self.parser.setContentHandler(self.pulldom) 287 288 289class SAX2DOMTestCase(unittest.TestCase): 290 291 def confirm(self, test, testname="Test"): 292 self.assertTrue(test, testname) 293 294 def test_basic(self): 295 """Ensure SAX2DOM can parse from a stream.""" 296 with io.StringIO(SMALL_SAMPLE) as fin: 297 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), 298 len(SMALL_SAMPLE)) 299 for evt, node in sd: 300 if evt == pulldom.START_ELEMENT and node.tagName == "html": 301 break 302 # Because the buffer is the same length as the XML, all the 303 # nodes should have been parsed and added: 304 self.assertGreater(len(node.childNodes), 0) 305 306 def testSAX2DOM(self): 307 """Ensure SAX2DOM expands nodes as expected.""" 308 sax2dom = pulldom.SAX2DOM() 309 sax2dom.startDocument() 310 sax2dom.startElement("doc", {}) 311 sax2dom.characters("text") 312 sax2dom.startElement("subelm", {}) 313 sax2dom.characters("text") 314 sax2dom.endElement("subelm") 315 sax2dom.characters("text") 316 sax2dom.endElement("doc") 317 sax2dom.endDocument() 318 319 doc = sax2dom.document 320 root = doc.documentElement 321 (text1, elm1, text2) = root.childNodes 322 text3 = elm1.childNodes[0] 323 324 self.assertIsNone(text1.previousSibling) 325 self.assertIs(text1.nextSibling, elm1) 326 self.assertIs(elm1.previousSibling, text1) 327 self.assertIs(elm1.nextSibling, text2) 328 self.assertIs(text2.previousSibling, elm1) 329 self.assertIsNone(text2.nextSibling) 330 self.assertIsNone(text3.previousSibling) 331 self.assertIsNone(text3.nextSibling) 332 333 self.assertIs(root.parentNode, doc) 334 self.assertIs(text1.parentNode, root) 335 self.assertIs(elm1.parentNode, root) 336 self.assertIs(text2.parentNode, root) 337 self.assertIs(text3.parentNode, elm1) 338 doc.unlink() 339 340 341if __name__ == "__main__": 342 unittest.main() 343