13257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel"""Facility to use the Expat parser to load a minidom instance 23257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom a string or file. 33257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 43257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielThis avoids all the overhead of SAX and pulldom to gain performance. 53257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel""" 63257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 73257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Warning! 83257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# 93257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# This module is tightly bound to the implementation details of the 103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# minidom DOM and can't be used with other DOM implementations. This 113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# is due, in part, to a lack of appropriate methods in the DOM (there is 123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# no way to create Entity and Notation nodes via the DOM Level 2 133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# interface), and for performance. The later is the cause of some fairly 143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# cryptic code. 153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# 163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Performance hacks: 173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# 183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# - .character_data_handler() has an extra case in which continuing 193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# data is appended to an existing Text node; this can be a 203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# speedup since pyexpat can break up character data into multiple 213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# callbacks even though we set the buffer_text attribute on the 223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# parser. This also gives us the advantage that we don't need a 233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# separate normalization pass. 243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# 253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# - Determining that a node exists is done using an identity comparison 263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# with None rather than a truth test; this avoids searching for and 273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# calling any methods on the node object if it exists. (A rather 283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# nice speedup is achieved this way as well!) 293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom import xmlbuilder, minidom, Node 313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE 323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.parsers import expat 333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.minidom import _append_child, _set_attribute_node 343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.NodeFilter import NodeFilter 353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielfrom xml.dom.minicompat import * 373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielTEXT_NODE = Node.TEXT_NODE 393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielCDATA_SECTION_NODE = Node.CDATA_SECTION_NODE 403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielDOCUMENT_NODE = Node.DOCUMENT_NODE 413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT 433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT 443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP 453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielFILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT 463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieltheDOMImplementation = minidom.getDOMImplementation() 483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Expat typename -> TypeInfo 503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_typeinfo_map = { 513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "CDATA": minidom.TypeInfo(None, "cdata"), 523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "ENUM": minidom.TypeInfo(None, "enumeration"), 533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "ENTITY": minidom.TypeInfo(None, "entity"), 543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "ENTITIES": minidom.TypeInfo(None, "entities"), 553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "ID": minidom.TypeInfo(None, "id"), 563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "IDREF": minidom.TypeInfo(None, "idref"), 573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "IDREFS": minidom.TypeInfo(None, "idrefs"), 583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), 593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), 603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel } 613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ElementInfo(object): 633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel __slots__ = '_attr_info', '_model', 'tagName' 643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, tagName, model=None): 663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.tagName = tagName 673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._attr_info = [] 683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._model = model 693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __getstate__(self): 713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self._attr_info, self._model, self.tagName 723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __setstate__(self, state): 743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._attr_info, self._model, self.tagName = state 753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def getAttributeType(self, aname): 773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for info in self._attr_info: 783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if info[1] == aname: 793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel t = info[-2] 803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if t[0] == "(": 813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return _typeinfo_map["ENUM"] 823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return _typeinfo_map[info[-2]] 843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return minidom._no_type 853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def getAttributeTypeNS(self, namespaceURI, localName): 873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return minidom._no_type 883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def isElementContent(self): 903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._model: 913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel type = self._model[0] 923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return type not in (expat.model.XML_CTYPE_ANY, 933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel expat.model.XML_CTYPE_MIXED) 943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return False 963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def isEmpty(self): 983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._model: 993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self._model[0] == expat.model.XML_CTYPE_EMPTY 1003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 1013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return False 1023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def isId(self, aname): 1043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for info in self._attr_info: 1053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if info[1] == aname: 1063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return info[-2] == "ID" 1073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return False 1083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def isIdNS(self, euri, ename, auri, aname): 1103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # not sure this is meaningful 1113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self.isId((auri, aname)) 1123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef _intern(builder, s): 1143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return builder._intern_setdefault(s, s) 1153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef _parse_ns_name(builder, name): 1173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel assert ' ' in name 1183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parts = name.split(' ') 1193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel intern = builder._intern_setdefault 1203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if len(parts) == 3: 1213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri, localname, prefix = parts 1223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel prefix = intern(prefix, prefix) 1233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel qname = "%s:%s" % (prefix, localname) 1243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel qname = intern(qname, qname) 1253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel localname = intern(localname, localname) 1263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 1273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri, localname = parts 1283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel prefix = EMPTY_PREFIX 1293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel qname = localname = intern(localname, localname) 1303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return intern(uri, uri), localname, prefix, qname 1313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ExpatBuilder: 1343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Document builder that uses Expat to build a ParsedXML.DOM document 1353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel instance.""" 1363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, options=None): 1383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if options is None: 1393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel options = xmlbuilder.Options() 1403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._options = options 1413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.filter is not None: 1423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._filter = FilterVisibilityController(self._options.filter) 1433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 1443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._filter = None 1453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # This *really* doesn't do anything in this case, so 1463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # override it with something fast & minimal. 1473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_start_element = id 1483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser = None 1493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.reset() 1503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def createParser(self): 1523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Create a new parser object.""" 1533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return expat.ParserCreate() 1543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def getParser(self): 1563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Return the parser object, creating a new one if needed.""" 1573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if not self._parser: 1583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser = self.createParser() 1593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._intern_setdefault = self._parser.intern.setdefault 1603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.buffer_text = True 1613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.ordered_attributes = True 1623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.specified_attributes = True 1633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.install(self._parser) 1643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self._parser 1653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def reset(self): 1673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Free all data structures used during DOM construction.""" 1683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document = theDOMImplementation.createDocument( 1693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel EMPTY_NAMESPACE, None, None) 1703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = self.document 1713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._elem_info = self.document._elem_info 1723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata = False 1733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def install(self, parser): 1753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Install the callbacks needed to build the DOM into the parser.""" 1763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # This creates circular references! 1773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 1783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartElementHandler = self.first_element_handler 1793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EndElementHandler = self.end_element_handler 1803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.ProcessingInstructionHandler = self.pi_handler 1813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.entities: 1823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EntityDeclHandler = self.entity_decl_handler 1833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.NotationDeclHandler = self.notation_decl_handler 1843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.comments: 1853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.CommentHandler = self.comment_handler 1863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.cdata_sections: 1873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartCdataSectionHandler = self.start_cdata_section_handler 1883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EndCdataSectionHandler = self.end_cdata_section_handler 1893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.CharacterDataHandler = self.character_data_handler_cdata 1903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 1913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.CharacterDataHandler = self.character_data_handler 1923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.ExternalEntityRefHandler = self.external_entity_ref_handler 1933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.XmlDeclHandler = self.xml_decl_handler 1943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.ElementDeclHandler = self.element_decl_handler 1953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.AttlistDeclHandler = self.attlist_decl_handler 1963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 1973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseFile(self, file): 1983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document from a file object, returning the document 1993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.""" 2003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self.getParser() 2013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel first_buffer = True 2023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 2033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel while 1: 2043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel buffer = file.read(16*1024) 2053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if not buffer: 2063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel break 2073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.Parse(buffer, 0) 2083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if first_buffer and self.document.documentElement: 2093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._setup_subset(buffer) 2103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel first_buffer = False 2113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.Parse("", True) 2123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel except ParseEscape: 2133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel pass 2143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doc = self.document 2153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.reset() 2163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser = None 2173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return doc 2183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseString(self, string): 2203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document from a string, returning the document node.""" 2213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self.getParser() 2223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 2233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.Parse(string, True) 2243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._setup_subset(string) 2253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel except ParseEscape: 2263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel pass 2273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doc = self.document 2283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.reset() 2293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser = None 2303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return doc 2313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _setup_subset(self, buffer): 2333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Load the internal subset if there might be one.""" 2343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self.document.doctype: 2353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel extractor = InternalSubsetExtractor() 2363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel extractor.parseString(buffer) 2373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel subset = extractor.getSubset() 2383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.doctype.internalSubset = subset 2393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_doctype_decl_handler(self, doctypeName, systemId, publicId, 2413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel has_internal_subset): 2423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype = self.document.implementation.createDocumentType( 2433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctypeName, publicId, systemId) 2443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype.ownerDocument = self.document 2453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.document, doctype) 2463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.doctype = doctype 2473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: 2483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.doctype = None 2493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel del self.document.childNodes[-1] 2503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype = None 2513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.EntityDeclHandler = None 2523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.NotationDeclHandler = None 2533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if has_internal_subset: 2543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if doctype is not None: 2553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype.entities._seq = [] 2563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype.notations._seq = [] 2573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.CommentHandler = None 2583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.ProcessingInstructionHandler = None 2593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 2603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_doctype_decl_handler(self): 2623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.comments: 2633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.CommentHandler = self.comment_handler 2643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._parser.ProcessingInstructionHandler = self.pi_handler 2653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if not (self._elem_info or self._filter): 2663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_end_element = id 2673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def pi_handler(self, target, data): 2693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document.createProcessingInstruction(target, data) 2703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 2713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 2723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode.removeChild(node) 2733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def character_data_handler_cdata(self, data): 2753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel childNodes = self.curNode.childNodes 2763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._cdata: 2773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if ( self._cdata_continue 2783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel and childNodes[-1].nodeType == CDATA_SECTION_NODE): 2793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel childNodes[-1].appendData(data) 2803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 2813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document.createCDATASection(data) 2823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata_continue = True 2833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel elif childNodes and childNodes[-1].nodeType == TEXT_NODE: 2843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = childNodes[-1] 2853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel value = node.data + data 2863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = node.__dict__ 2873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = value 2883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 2893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 2903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = minidom.Text() 2913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = node.__dict__ 2923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = data 2933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerDocument'] = self.document 2943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 2953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 2963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def character_data_handler(self, data): 2973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel childNodes = self.curNode.childNodes 2983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if childNodes and childNodes[-1].nodeType == TEXT_NODE: 2993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = childNodes[-1] 3003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = node.__dict__ 3013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = node.data + data 3023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 3033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = minidom.Text() 3043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = node.__dict__ 3053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = node.data + data 3063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerDocument'] = self.document 3073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 3083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def entity_decl_handler(self, entityName, is_parameter_entity, value, 3103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel base, systemId, publicId, notationName): 3113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if is_parameter_entity: 3123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # we don't care about parameter entities for the DOM 3133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 3143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if not self._options.entities: 3153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 3163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document._create_entity(entityName, publicId, 3173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel systemId, notationName) 3183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if value is not None: 3193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # internal entity 3203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # node *should* be readonly, but we'll cheat 3213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel child = self.document.createTextNode(value) 3223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.childNodes.append(child) 3233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.doctype.entities._seq.append(node) 3243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 3253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel del self.document.doctype.entities._seq[-1] 3263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def notation_decl_handler(self, notationName, base, systemId, publicId): 3283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document._create_notation(notationName, publicId, systemId) 3293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.doctype.notations._seq.append(node) 3303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: 3313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel del self.document.doctype.notations._seq[-1] 3323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def comment_handler(self, data): 3343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document.createComment(data) 3353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 3363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 3373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode.removeChild(node) 3383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_cdata_section_handler(self): 3403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata = True 3413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata_continue = False 3423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_cdata_section_handler(self): 3443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata = False 3453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._cdata_continue = False 3463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def external_entity_ref_handler(self, context, base, systemId, publicId): 3483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 1 3493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def first_element_handler(self, name, attributes): 3513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter is None and not self._elem_info: 3523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_end_element = id 3533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.getParser().StartElementHandler = self.start_element_handler 3543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.start_element_handler(name, attributes) 3553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_element_handler(self, name, attributes): 3573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self.document.createElement(name) 3583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 3593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = node 3603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if attributes: 3623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for i in range(0, len(attributes), 2): 3633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, 3643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel None, EMPTY_PREFIX) 3653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel value = attributes[i+1] 3663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.childNodes[0].__dict__ 3673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = value 3683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.__dict__ 3693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['value'] = d['nodeValue'] = value 3703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerDocument'] = self.document 3713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _set_attribute_node(node, a) 3723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if node is not self.document.documentElement: 3743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_start_element(node) 3753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _finish_start_element(self, node): 3773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter: 3783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # To be general, we'd have to call isSameNode(), but this 3793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # is sufficient for minidom: 3803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if node is self.document.documentElement: 3813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 3823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel filt = self._filter.startContainer(node) 3833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if filt == FILTER_REJECT: 3843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # ignore this node & all descendents 3853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Rejecter(self) 3863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel elif filt == FILTER_SKIP: 3873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # ignore this node, but make it's children become 3883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # children of the parent node 3893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Skipper(self) 3903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 3913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 3923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = node.parentNode 3933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.parentNode.removeChild(node) 3943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.unlink() 3953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 3963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # If this ever changes, Namespaces.end_element_handler() needs to 3973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # be changed to match. 3983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # 3993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_element_handler(self, name): 4003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel curNode = self.curNode 4013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = curNode.parentNode 4023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_end_element(curNode) 4033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _finish_end_element(self, curNode): 4053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info = self._elem_info.get(curNode.tagName) 4063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if info: 4073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._handle_white_text_nodes(curNode, info) 4083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter: 4093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if curNode is self.document.documentElement: 4103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 4113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._filter.acceptNode(curNode) == FILTER_REJECT: 4123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode.removeChild(curNode) 4133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel curNode.unlink() 4143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _handle_white_text_nodes(self, node, info): 4163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if (self._options.whitespace_in_element_content 4173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel or not info.isElementContent()): 4183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return 4193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # We have element type information and should remove ignorable 4213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # whitespace; identify for text nodes which contain only 4223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # whitespace. 4233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel L = [] 4243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for child in node.childNodes: 4253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if child.nodeType == TEXT_NODE and not child.data.strip(): 4263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel L.append(child) 4273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # Remove ignorable whitespace from the tree. 4293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for child in L: 4303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.removeChild(child) 4313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def element_decl_handler(self, name, model): 4333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info = self._elem_info.get(name) 4343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if info is None: 4353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._elem_info[name] = ElementInfo(name, model) 4363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 4373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel assert info._model is None 4383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info._model = model 4393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def attlist_decl_handler(self, elem, name, type, default, required): 4413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info = self._elem_info.get(elem) 4423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if info is None: 4433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info = ElementInfo(elem) 4443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._elem_info[elem] = info 4453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel info._attr_info.append( 4463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel [None, name, None, None, default, 0, type, required]) 4473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def xml_decl_handler(self, version, encoding, standalone): 4493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.version = version 4503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.encoding = encoding 4513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # This is still a little ugly, thanks to the pyexpat API. ;-( 4523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if standalone >= 0: 4533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if standalone: 4543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.standalone = True 4553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 4563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document.standalone = False 4573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Don't include FILTER_INTERRUPT, since that's checked separately 4603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# where allowed. 4613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) 4623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FilterVisibilityController(object): 4643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Wrapper around a DOMBuilderFilter which implements the checks 4653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel to make the whatToShow filter attribute work.""" 4663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel __slots__ = 'filter', 4683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, filter): 4703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.filter = filter 4713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def startContainer(self, node): 4733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel mask = self._nodetype_mask[node.nodeType] 4743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self.filter.whatToShow & mask: 4753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel val = self.filter.startContainer(node) 4763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if val == FILTER_INTERRUPT: 4773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ParseEscape 4783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if val not in _ALLOWED_FILTER_RETURNS: 4793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ValueError, \ 4803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "startContainer() returned illegal value: " + repr(val) 4813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return val 4823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 4833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return FILTER_ACCEPT 4843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 4853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def acceptNode(self, node): 4863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel mask = self._nodetype_mask[node.nodeType] 4873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self.filter.whatToShow & mask: 4883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel val = self.filter.acceptNode(node) 4893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if val == FILTER_INTERRUPT: 4903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ParseEscape 4913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if val == FILTER_SKIP: 4923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # move all child nodes to the parent, and remove this node 4933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parent = node.parentNode 4943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for child in node.childNodes[:]: 4953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parent.appendChild(child) 4963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # node is handled by the caller 4973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return FILTER_REJECT 4983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if val not in _ALLOWED_FILTER_RETURNS: 4993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ValueError, \ 5003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "acceptNode() returned illegal value: " + repr(val) 5013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return val 5023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 5033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return FILTER_ACCEPT 5043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _nodetype_mask = { 5063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, 5073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, 5083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.TEXT_NODE: NodeFilter.SHOW_TEXT, 5093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, 5103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, 5113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, 5123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, 5133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, 5143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, 5153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, 5163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, 5173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, 5183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel } 5193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FilterCrutch(object): 5223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel __slots__ = '_builder', '_level', '_old_start', '_old_end' 5233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, builder): 5253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._level = 0 5263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._builder = builder 5273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = builder._parser 5283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._old_start = parser.StartElementHandler 5293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._old_end = parser.EndElementHandler 5303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartElementHandler = self.start_element_handler 5313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EndElementHandler = self.end_element_handler 5323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Rejecter(FilterCrutch): 5343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel __slots__ = () 5353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, builder): 5373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel FilterCrutch.__init__(self, builder) 5383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = builder._parser 5393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for name in ("ProcessingInstructionHandler", 5403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "CommentHandler", 5413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "CharacterDataHandler", 5423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "StartCdataSectionHandler", 5433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "EndCdataSectionHandler", 5443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "ExternalEntityRefHandler", 5453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ): 5463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel setattr(parser, name, None) 5473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_element_handler(self, *args): 5493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._level = self._level + 1 5503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_element_handler(self, *args): 5523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._level == 0: 5533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # restore the old handlers 5543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self._builder._parser 5553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._builder.install(parser) 5563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartElementHandler = self._old_start 5573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EndElementHandler = self._old_end 5583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 5593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._level = self._level - 1 5603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Skipper(FilterCrutch): 5623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel __slots__ = () 5633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_element_handler(self, *args): 5653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = self._builder.curNode 5663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._old_start(*args) 5673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._builder.curNode is not node: 5683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._level = self._level + 1 5693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_element_handler(self, *args): 5713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._level == 0: 5723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # We're popping back out of the node we're skipping, so we 5733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # shouldn't need to do anything but reset the handlers. 5743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._builder._parser.StartElementHandler = self._old_start 5753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._builder._parser.EndElementHandler = self._old_end 5763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._builder = None 5773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 5783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._level = self._level - 1 5793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._old_end(*args) 5803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# framework document used by the fragment builder. 5833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel# Takes a string for the doctype, subset string, and namespace attrs string. 5843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ 5863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "http://xml.python.org/entities/fragment-builder/internal" 5873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 5883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel_FRAGMENT_BUILDER_TEMPLATE = ( 5893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel '''\ 5903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel<!DOCTYPE wrapper 5913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel %%s [ 5923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel <!ENTITY fragment-builder-internal 5933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel SYSTEM "%s"> 5943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel%%s 5953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel]> 5963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel<wrapper %%s 5973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel>&fragment-builder-internal;</wrapper>''' 5983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) 5993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FragmentBuilder(ExpatBuilder): 6023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Builder which constructs document fragments given XML source 6033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel text and a context node. 6043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel The context node is expected to provide information about the 6063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel namespace declarations which are in scope at the start of the 6073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fragment. 6083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 6093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def __init__(self, context, options=None): 6113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if context.nodeType == DOCUMENT_NODE: 6123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.originalDocument = context 6133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.context = context 6143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 6153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.originalDocument = context.ownerDocument 6163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.context = context 6173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.__init__(self, options) 6183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def reset(self): 6203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.reset(self) 6213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.fragment = None 6223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseFile(self, file): 6243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document fragment from a file object, returning the 6253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fragment node.""" 6263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self.parseString(file.read()) 6273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseString(self, string): 6293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document fragment from a string, returning the 6303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fragment node.""" 6313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._source = string 6323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self.getParser() 6333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype = self.originalDocument.doctype 6343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ident = "" 6353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if doctype: 6363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel subset = doctype.internalSubset or self._getDeclarations() 6373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if doctype.publicId: 6383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ident = ('PUBLIC "%s" "%s"' 6393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel % (doctype.publicId, doctype.systemId)) 6403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel elif doctype.systemId: 6413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ident = 'SYSTEM "%s"' % doctype.systemId 6423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 6433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel subset = "" 6443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel nsattrs = self._getNSattrs() # get ns decls from node's ancestors 6453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) 6463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 6473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.Parse(document, 1) 6483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel except: 6493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.reset() 6503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise 6513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fragment = self.fragment 6523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.reset() 6533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel## self._parser = None 6543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return fragment 6553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _getDeclarations(self): 6573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Re-create the internal subset from the DocumentType node. 6583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel This is only needed if we don't already have the 6603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel internalSubset as a string. 6613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 6623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel doctype = self.context.ownerDocument.doctype 6633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = "" 6643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if doctype: 6653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for i in range(doctype.notations.length): 6663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel notation = doctype.notations.item(i) 6673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if s: 6683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = s + "\n " 6693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = "%s<!NOTATION %s" % (s, notation.nodeName) 6703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if notation.publicId: 6713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = '%s PUBLIC "%s"\n "%s">' \ 6723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel % (s, notation.publicId, notation.systemId) 6733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 6743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = '%s SYSTEM "%s">' % (s, notation.systemId) 6753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for i in range(doctype.entities.length): 6763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel entity = doctype.entities.item(i) 6773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if s: 6783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = s + "\n " 6793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = "%s<!ENTITY %s" % (s, entity.nodeName) 6803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if entity.publicId: 6813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = '%s PUBLIC "%s"\n "%s"' \ 6823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel % (s, entity.publicId, entity.systemId) 6833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel elif entity.systemId: 6843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = '%s SYSTEM "%s"' % (s, entity.systemId) 6853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 6863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = '%s "%s"' % (s, entity.firstChild.data) 6873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if entity.notationName: 6883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = "%s NOTATION %s" % (s, entity.notationName) 6893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = s + ">" 6903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return s 6913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _getNSattrs(self): 6933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return "" 6943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 6953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def external_entity_ref_handler(self, context, base, systemId, publicId): 6963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: 6973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # this entref is the one that we made to put the subtree 6983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # in; all of our given input is parsed in here. 6993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel old_document = self.document 7003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel old_cur_node = self.curNode 7013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self._parser.ExternalEntityParserCreate(context) 7023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # put the real document back, parse into the fragment to return 7033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document = self.originalDocument 7043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.fragment = self.document.createDocumentFragment() 7053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = self.fragment 7063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 7073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.Parse(self._source, 1) 7083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel finally: 7093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = old_cur_node 7103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.document = old_document 7113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._source = None 7123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return -1 7133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 7143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return ExpatBuilder.external_entity_ref_handler( 7153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self, context, base, systemId, publicId) 7163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass Namespaces: 7193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Mix-in class for builders; adds support for namespaces.""" 7203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _initNamespaces(self): 7223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # list of (prefix, uri) ns declarations. Namespace attrs are 7233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # constructed from this and added to the element's attrs. 7243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._ns_ordered_prefixes = [] 7253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def createParser(self): 7273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Create a new namespace-handling parser.""" 7283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = expat.ParserCreate(namespace_separator=" ") 7293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.namespace_prefixes = True 7303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return parser 7313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def install(self, parser): 7333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Insert the namespace-handlers onto the parser.""" 7343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.install(self, parser) 7353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._options.namespace_declarations: 7363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartNamespaceDeclHandler = ( 7373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.start_namespace_decl_handler) 7383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_namespace_decl_handler(self, prefix, uri): 7403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Push this namespace declaration on our storage.""" 7413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._ns_ordered_prefixes.append((prefix, uri)) 7423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_element_handler(self, name, attributes): 7443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if ' ' in name: 7453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri, localname, prefix, qname = _parse_ns_name(self, name) 7463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 7473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri = EMPTY_NAMESPACE 7483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel qname = name 7493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel localname = None 7503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel prefix = EMPTY_PREFIX 7513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node = minidom.Element(qname, uri, prefix, localname) 7523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node.ownerDocument = self.document 7533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _append_child(self.curNode, node) 7543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = node 7553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if self._ns_ordered_prefixes: 7573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for prefix, uri in self._ns_ordered_prefixes: 7583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if prefix: 7593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel a = minidom.Attr(_intern(self, 'xmlns:' + prefix), 7603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel XMLNS_NAMESPACE, prefix, "xmlns") 7613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 7623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel a = minidom.Attr("xmlns", XMLNS_NAMESPACE, 7633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "xmlns", EMPTY_PREFIX) 7643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.childNodes[0].__dict__ 7653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = uri 7663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.__dict__ 7673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['value'] = d['nodeValue'] = uri 7683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerDocument'] = self.document 7693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _set_attribute_node(node, a) 7703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel del self._ns_ordered_prefixes[:] 7713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if attributes: 7733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrs = node._attrs 7743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrsNS = node._attrsNS 7753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for i in range(0, len(attributes), 2): 7763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel aname = attributes[i] 7773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel value = attributes[i+1] 7783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if ' ' in aname: 7793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri, localname, prefix, qname = _parse_ns_name(self, aname) 7803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel a = minidom.Attr(qname, uri, localname, prefix) 7813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrs[qname] = a 7823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrsNS[(uri, localname)] = a 7833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 7843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel a = minidom.Attr(aname, EMPTY_NAMESPACE, 7853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel aname, EMPTY_PREFIX) 7863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrs[aname] = a 7873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel _attrsNS[(EMPTY_NAMESPACE, aname)] = a 7883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.childNodes[0].__dict__ 7893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['data'] = d['nodeValue'] = value 7903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d = a.__dict__ 7913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerDocument'] = self.document 7923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['value'] = d['nodeValue'] = value 7933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel d['ownerElement'] = node 7943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 7953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if __debug__: 7963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # This only adds some asserts to the original 7973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # end_element_handler(), so we only define this when -O is not 7983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # used. If changing one, be sure to check the other to see if 7993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # it needs to be changed as well. 8003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # 8013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_element_handler(self, name): 8023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel curNode = self.curNode 8033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if ' ' in name: 8043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel uri, localname, prefix, qname = _parse_ns_name(self, name) 8053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel assert (curNode.namespaceURI == uri 8063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel and curNode.localName == localname 8073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel and curNode.prefix == prefix), \ 8083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "element stack messed up! (namespace)" 8093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 8103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel assert curNode.nodeName == name, \ 8113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "element stack messed up - bad nodeName" 8123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel assert curNode.namespaceURI == EMPTY_NAMESPACE, \ 8133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel "element stack messed up - bad namespaceURI" 8143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.curNode = curNode.parentNode 8153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._finish_end_element(curNode) 8163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ExpatBuilderNS(Namespaces, ExpatBuilder): 8193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Document builder that supports namespaces.""" 8203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def reset(self): 8223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.reset(self) 8233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._initNamespaces() 8243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass FragmentBuilderNS(Namespaces, FragmentBuilder): 8273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Fragment builder that supports namespaces.""" 8283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def reset(self): 8303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel FragmentBuilder.reset(self) 8313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self._initNamespaces() 8323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def _getNSattrs(self): 8343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Return string of namespace attributes from this element and 8353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ancestors.""" 8363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # XXX This needs to be re-written to walk the ancestors of the 8373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # context to build up the namespace information from 8383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # declarations, elements, and attributes found in context. 8393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # Otherwise we have to store a bunch more data on the DOM 8403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # (though that *might* be more reliable -- not clear). 8413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel attrs = "" 8423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel context = self.context 8433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel L = [] 8443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel while context: 8453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if hasattr(context, '_ns_prefix_uri'): 8463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel for prefix, uri in context._ns_prefix_uri.items(): 8473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel # add every new NS decl from context to L and attrs string 8483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if prefix in L: 8493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel continue 8503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel L.append(prefix) 8513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if prefix: 8523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel declname = "xmlns:" + prefix 8533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 8543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel declname = "xmlns" 8553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if attrs: 8563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel attrs = "%s\n %s='%s'" % (attrs, declname, uri) 8573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 8583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel attrs = " %s='%s'" % (declname, uri) 8593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel context = context.parentNode 8603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return attrs 8613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass ParseEscape(Exception): 8643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" 8653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel pass 8663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanielclass InternalSubsetExtractor(ExpatBuilder): 8683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """XML processor which can rip out the internal document type subset.""" 8693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel subset = None 8713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def getSubset(self): 8733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Return the internal subset as a string.""" 8743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return self.subset 8753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseFile(self, file): 8773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 8783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.parseFile(self, file) 8793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel except ParseEscape: 8803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel pass 8813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def parseString(self, string): 8833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 8843257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel ExpatBuilder.parseString(self, string) 8853257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel except ParseEscape: 8863257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel pass 8873257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8883257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def install(self, parser): 8893257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 8903257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.StartElementHandler = self.start_element_handler 8913257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 8923257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_doctype_decl_handler(self, name, publicId, systemId, 8933257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel has_internal_subset): 8943257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if has_internal_subset: 8953257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser = self.getParser() 8963257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.subset = [] 8973257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.DefaultHandler = self.subset.append 8983257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 8993257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9003257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ParseEscape() 9013257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9023257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def end_doctype_decl_handler(self): 9033257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') 9043257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel self.subset = s 9053257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ParseEscape() 9063257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9073257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel def start_element_handler(self, name, attrs): 9083257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel raise ParseEscape() 9093257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9103257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9113257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parse(file, namespaces=True): 9123257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document, returning the resulting Document node. 9133257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9143257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 'file' may be either a file name or an open file object. 9153257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 9163257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if namespaces: 9173257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = ExpatBuilderNS() 9183257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9193257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = ExpatBuilder() 9203257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9213257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if isinstance(file, StringTypes): 9223257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fp = open(file, 'rb') 9233257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 9243257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel result = builder.parseFile(fp) 9253257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel finally: 9263257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fp.close() 9273257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9283257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel result = builder.parseFile(file) 9293257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return result 9303257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9313257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9323257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseString(string, namespaces=True): 9333257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a document from a string, returning the resulting 9343257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel Document node. 9353257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 9363257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if namespaces: 9373257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = ExpatBuilderNS() 9383257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9393257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = ExpatBuilder() 9403257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return builder.parseString(string) 9413257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9423257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9433257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseFragment(file, context, namespaces=True): 9443257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a fragment of a document, given the context from which it 9453257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel was originally extracted. context should be the parent of the 9463257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel node(s) which are in the fragment. 9473257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9483257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 'file' may be either a file name or an open file object. 9493257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 9503257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if namespaces: 9513257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = FragmentBuilderNS(context) 9523257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9533257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = FragmentBuilder(context) 9543257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9553257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if isinstance(file, StringTypes): 9563257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fp = open(file, 'rb') 9573257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel try: 9583257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel result = builder.parseFile(fp) 9593257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel finally: 9603257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel fp.close() 9613257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9623257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel result = builder.parseFile(file) 9633257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return result 9643257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9653257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9663257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef parseFragmentString(string, context, namespaces=True): 9673257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Parse a fragment of a document from a string, given the context 9683257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel from which it was originally extracted. context should be the 9693257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel parent of the node(s) which are in the fragment. 9703257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """ 9713257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if namespaces: 9723257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = FragmentBuilderNS(context) 9733257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9743257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel builder = FragmentBuilder(context) 9753257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return builder.parseString(string) 9763257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9773257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel 9783257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDanieldef makeBuilder(options): 9793257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel """Create a builder based on an Options object.""" 9803257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel if options.namespaces: 9813257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return ExpatBuilderNS(options) 9823257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel else: 9833257aa99321d745773a6bd1bd4ce7f6fafe74411Daryl McDaniel return ExpatBuilder(options) 984