1fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis"""Facility to use the Expat parser to load a minidom instance 2fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisfrom a string or file. 3fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 4fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisThis avoids all the overhead of SAX and pulldom to gain performance. 5fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis""" 6fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 7fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# Warning! 8fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# 9fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# This module is tightly bound to the implementation details of the 10fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# minidom DOM and can't be used with other DOM implementations. This 11fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# is due, in part, to a lack of appropriate methods in the DOM (there is 12fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# no way to create Entity and Notation nodes via the DOM Level 2 13204bf0b9aecd221c33f3e0909f261411783acf1bMartin Panter# interface), and for performance. The latter is the cause of some fairly 14fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# cryptic code. 15fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# 16fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# Performance hacks: 17fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# 18fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# - .character_data_handler() has an extra case in which continuing 19fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# data is appended to an existing Text node; this can be a 20fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# speedup since pyexpat can break up character data into multiple 21fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# callbacks even though we set the buffer_text attribute on the 22fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# parser. This also gives us the advantage that we don't need a 23fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# separate normalization pass. 24fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# 25fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# - Determining that a node exists is done using an identity comparison 26fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# with None rather than a truth test; this avoids searching for and 27fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# calling any methods on the node object if it exists. (A rather 28fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# nice speedup is achieved this way as well!) 29fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 300e3f591aeeef9ed715f8770320f4c4c7332a8794Thomas Woutersfrom xml.dom import xmlbuilder, minidom, Node 310e3f591aeeef9ed715f8770320f4c4c7332a8794Thomas Woutersfrom xml.dom import EMPTY_NAMESPACE, EMPTY_PREFIX, XMLNS_NAMESPACE 320e3f591aeeef9ed715f8770320f4c4c7332a8794Thomas Woutersfrom xml.parsers import expat 330e3f591aeeef9ed715f8770320f4c4c7332a8794Thomas Woutersfrom xml.dom.minidom import _append_child, _set_attribute_node 340e3f591aeeef9ed715f8770320f4c4c7332a8794Thomas Woutersfrom xml.dom.NodeFilter import NodeFilter 35fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 36fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisTEXT_NODE = Node.TEXT_NODE 37fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisCDATA_SECTION_NODE = Node.CDATA_SECTION_NODE 38fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisDOCUMENT_NODE = Node.DOCUMENT_NODE 39fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 40fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisFILTER_ACCEPT = xmlbuilder.DOMBuilderFilter.FILTER_ACCEPT 41fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisFILTER_REJECT = xmlbuilder.DOMBuilderFilter.FILTER_REJECT 42fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisFILTER_SKIP = xmlbuilder.DOMBuilderFilter.FILTER_SKIP 43fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwisFILTER_INTERRUPT = xmlbuilder.DOMBuilderFilter.FILTER_INTERRUPT 44fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 45fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. LöwistheDOMImplementation = minidom.getDOMImplementation() 46fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 47fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# Expat typename -> TypeInfo 48fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis_typeinfo_map = { 49fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "CDATA": minidom.TypeInfo(None, "cdata"), 50fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "ENUM": minidom.TypeInfo(None, "enumeration"), 51fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "ENTITY": minidom.TypeInfo(None, "entity"), 52fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "ENTITIES": minidom.TypeInfo(None, "entities"), 53fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "ID": minidom.TypeInfo(None, "id"), 54fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "IDREF": minidom.TypeInfo(None, "idref"), 55fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "IDREFS": minidom.TypeInfo(None, "idrefs"), 56fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "NMTOKEN": minidom.TypeInfo(None, "nmtoken"), 57fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "NMTOKENS": minidom.TypeInfo(None, "nmtokens"), 58fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis } 59fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 6049fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersclass ElementInfo(object): 61fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis __slots__ = '_attr_info', '_model', 'tagName' 62fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 63fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, tagName, model=None): 64fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.tagName = tagName 65fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._attr_info = [] 66fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._model = model 67fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 68fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __getstate__(self): 69fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self._attr_info, self._model, self.tagName 70fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 71fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __setstate__(self, state): 72fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._attr_info, self._model, self.tagName = state 73fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 74fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def getAttributeType(self, aname): 75fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for info in self._attr_info: 76fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if info[1] == aname: 77fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis t = info[-2] 78fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if t[0] == "(": 79fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return _typeinfo_map["ENUM"] 80fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 81fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return _typeinfo_map[info[-2]] 82fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return minidom._no_type 83fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 84fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def getAttributeTypeNS(self, namespaceURI, localName): 85fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return minidom._no_type 86fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 87fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def isElementContent(self): 88fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._model: 89fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis type = self._model[0] 90fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return type not in (expat.model.XML_CTYPE_ANY, 91fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis expat.model.XML_CTYPE_MIXED) 92fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 93fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return False 94fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 95fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def isEmpty(self): 96fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._model: 97fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self._model[0] == expat.model.XML_CTYPE_EMPTY 98fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 99fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return False 100fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 101fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def isId(self, aname): 102fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for info in self._attr_info: 103fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if info[1] == aname: 104fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return info[-2] == "ID" 105fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return False 106fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 107fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def isIdNS(self, euri, ename, auri, aname): 108fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # not sure this is meaningful 109fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self.isId((auri, aname)) 110fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 111fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisdef _intern(builder, s): 112fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return builder._intern_setdefault(s, s) 113fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 114fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisdef _parse_ns_name(builder, name): 115fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis assert ' ' in name 116fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parts = name.split(' ') 117fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis intern = builder._intern_setdefault 118fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if len(parts) == 3: 119fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri, localname, prefix = parts 120fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis prefix = intern(prefix, prefix) 121fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis qname = "%s:%s" % (prefix, localname) 122fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis qname = intern(qname, qname) 123fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis localname = intern(localname, localname) 1249077d24d7f85e09e53def11b2beeaf40749e2464R David Murray elif len(parts) == 2: 125fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri, localname = parts 126fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis prefix = EMPTY_PREFIX 127fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis qname = localname = intern(localname, localname) 1289077d24d7f85e09e53def11b2beeaf40749e2464R David Murray else: 1299077d24d7f85e09e53def11b2beeaf40749e2464R David Murray raise ValueError("Unsupported syntax: spaces in URIs not supported: %r" % name) 130fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return intern(uri, uri), localname, prefix, qname 131fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 132fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 133fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass ExpatBuilder: 134fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Document builder that uses Expat to build a ParsedXML.DOM document 135fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis instance.""" 136fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 137fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, options=None): 138fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if options is None: 139fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis options = xmlbuilder.Options() 140fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._options = options 141fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.filter is not None: 142fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._filter = FilterVisibilityController(self._options.filter) 143fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 144fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._filter = None 145fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # This *really* doesn't do anything in this case, so 146fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # override it with something fast & minimal. 147fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_start_element = id 148fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser = None 149fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.reset() 150fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 151fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def createParser(self): 152fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Create a new parser object.""" 153fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return expat.ParserCreate() 154fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 155fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def getParser(self): 156fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Return the parser object, creating a new one if needed.""" 157fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if not self._parser: 158fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser = self.createParser() 159fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._intern_setdefault = self._parser.intern.setdefault 160fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.buffer_text = True 161fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.ordered_attributes = True 162fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.specified_attributes = True 163fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.install(self._parser) 164fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self._parser 165fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 166fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def reset(self): 167fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Free all data structures used during DOM construction.""" 168fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document = theDOMImplementation.createDocument( 169fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis EMPTY_NAMESPACE, None, None) 170fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = self.document 171fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._elem_info = self.document._elem_info 172fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata = False 173fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 174fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def install(self, parser): 175fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Install the callbacks needed to build the DOM into the parser.""" 176fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # This creates circular references! 177fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 178fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartElementHandler = self.first_element_handler 179fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EndElementHandler = self.end_element_handler 180fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.ProcessingInstructionHandler = self.pi_handler 181fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.entities: 182fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EntityDeclHandler = self.entity_decl_handler 183fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.NotationDeclHandler = self.notation_decl_handler 184fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.comments: 185fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.CommentHandler = self.comment_handler 186fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.cdata_sections: 187fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartCdataSectionHandler = self.start_cdata_section_handler 188fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EndCdataSectionHandler = self.end_cdata_section_handler 189fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.CharacterDataHandler = self.character_data_handler_cdata 190fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 191fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.CharacterDataHandler = self.character_data_handler 192fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.ExternalEntityRefHandler = self.external_entity_ref_handler 193fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.XmlDeclHandler = self.xml_decl_handler 194fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.ElementDeclHandler = self.element_decl_handler 195fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.AttlistDeclHandler = self.attlist_decl_handler 196fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 197fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseFile(self, file): 198fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document from a file object, returning the document 199fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.""" 200fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self.getParser() 201fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis first_buffer = True 202fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 203fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis while 1: 204fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis buffer = file.read(16*1024) 205fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if not buffer: 206fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis break 207fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.Parse(buffer, 0) 208fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if first_buffer and self.document.documentElement: 209fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._setup_subset(buffer) 210fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis first_buffer = False 211fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.Parse("", True) 212fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis except ParseEscape: 213fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis pass 214fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doc = self.document 215fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.reset() 216fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser = None 217fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return doc 218fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 219fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseString(self, string): 220fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document from a string, returning the document node.""" 221fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self.getParser() 222fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 223fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.Parse(string, True) 224fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._setup_subset(string) 225fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis except ParseEscape: 226fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis pass 227fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doc = self.document 228fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.reset() 229fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser = None 230fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return doc 231fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 232fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _setup_subset(self, buffer): 233fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Load the internal subset if there might be one.""" 234fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self.document.doctype: 235fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis extractor = InternalSubsetExtractor() 236fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis extractor.parseString(buffer) 237fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis subset = extractor.getSubset() 238fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.doctype.internalSubset = subset 239fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 240fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_doctype_decl_handler(self, doctypeName, systemId, publicId, 241fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis has_internal_subset): 242fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype = self.document.implementation.createDocumentType( 243fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctypeName, publicId, systemId) 244fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype.ownerDocument = self.document 245297d97241a065fe2a341fe5f340c81566b762142Georg Brandl _append_child(self.document, doctype) 246fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.doctype = doctype 247fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter and self._filter.acceptNode(doctype) == FILTER_REJECT: 248fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.doctype = None 249fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis del self.document.childNodes[-1] 250fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype = None 251fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.EntityDeclHandler = None 252fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.NotationDeclHandler = None 253fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if has_internal_subset: 254fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if doctype is not None: 255fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype.entities._seq = [] 256fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype.notations._seq = [] 257fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.CommentHandler = None 258fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.ProcessingInstructionHandler = None 259fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 260fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 261fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_doctype_decl_handler(self): 262fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.comments: 263fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.CommentHandler = self.comment_handler 264fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._parser.ProcessingInstructionHandler = self.pi_handler 265fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if not (self._elem_info or self._filter): 266fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_end_element = id 267fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 268fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def pi_handler(self, target, data): 269fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document.createProcessingInstruction(target, data) 270fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 271fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 272bc05fc5d2b970a18686c71479f28372a24f97190Neal Norwitz self.curNode.removeChild(node) 273fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 274fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def character_data_handler_cdata(self, data): 275fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis childNodes = self.curNode.childNodes 276fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._cdata: 277fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if ( self._cdata_continue 278fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis and childNodes[-1].nodeType == CDATA_SECTION_NODE): 279fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis childNodes[-1].appendData(data) 280fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 281fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document.createCDATASection(data) 282fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata_continue = True 283fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis elif childNodes and childNodes[-1].nodeType == TEXT_NODE: 284fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = childNodes[-1] 285fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis value = node.data + data 28614aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.data = value 287fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 288fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 289fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = minidom.Text() 29014aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.data = data 29114aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.ownerDocument = self.document 292fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 293fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 294fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def character_data_handler(self, data): 295fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis childNodes = self.curNode.childNodes 296fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if childNodes and childNodes[-1].nodeType == TEXT_NODE: 297fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = childNodes[-1] 29814aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.data = node.data + data 299fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 300fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = minidom.Text() 30114aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.data = node.data + data 30214aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis node.ownerDocument = self.document 303fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 304fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 305fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def entity_decl_handler(self, entityName, is_parameter_entity, value, 306fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis base, systemId, publicId, notationName): 307fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if is_parameter_entity: 308fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # we don't care about parameter entities for the DOM 309fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 310fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if not self._options.entities: 311fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 312fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document._create_entity(entityName, publicId, 313fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis systemId, notationName) 314fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if value is not None: 315fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # internal entity 316fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # node *should* be readonly, but we'll cheat 317fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis child = self.document.createTextNode(value) 318fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.childNodes.append(child) 319fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.doctype.entities._seq.append(node) 320fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 321fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis del self.document.doctype.entities._seq[-1] 322fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 323fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def notation_decl_handler(self, notationName, base, systemId, publicId): 324fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document._create_notation(notationName, publicId, systemId) 325fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.doctype.notations._seq.append(node) 326fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter and self._filter.acceptNode(node) == FILTER_ACCEPT: 327fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis del self.document.doctype.notations._seq[-1] 328fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 329fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def comment_handler(self, data): 330fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document.createComment(data) 331fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 332fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter and self._filter.acceptNode(node) == FILTER_REJECT: 333041411a1c70e0e01fb32864359990d4fd3a20f97Martin v. Löwis self.curNode.removeChild(node) 334fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 335fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_cdata_section_handler(self): 336fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata = True 337fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata_continue = False 338fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 339fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_cdata_section_handler(self): 340fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata = False 341fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._cdata_continue = False 342fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 343fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def external_entity_ref_handler(self, context, base, systemId, publicId): 344fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 1 345fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 346fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def first_element_handler(self, name, attributes): 347fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter is None and not self._elem_info: 348fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_end_element = id 349fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.getParser().StartElementHandler = self.start_element_handler 350fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.start_element_handler(name, attributes) 351fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 352fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_element_handler(self, name, attributes): 353fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self.document.createElement(name) 354fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 355fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = node 356fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 357fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if attributes: 358fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for i in range(0, len(attributes), 2): 359fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis a = minidom.Attr(attributes[i], EMPTY_NAMESPACE, 360fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis None, EMPTY_PREFIX) 361fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis value = attributes[i+1] 36214aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.value = value 36314aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.ownerDocument = self.document 364fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _set_attribute_node(node, a) 365fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 366fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if node is not self.document.documentElement: 367fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_start_element(node) 368fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 369fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _finish_start_element(self, node): 370fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter: 371fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # To be general, we'd have to call isSameNode(), but this 372fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # is sufficient for minidom: 373fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if node is self.document.documentElement: 374fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 375fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis filt = self._filter.startContainer(node) 376fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if filt == FILTER_REJECT: 377fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # ignore this node & all descendents 378fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Rejecter(self) 379fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis elif filt == FILTER_SKIP: 380fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # ignore this node, but make it's children become 381fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # children of the parent node 382fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Skipper(self) 383fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 384fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 385fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = node.parentNode 386fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.parentNode.removeChild(node) 387fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.unlink() 388fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 389fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # If this ever changes, Namespaces.end_element_handler() needs to 390fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # be changed to match. 391fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # 392fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_element_handler(self, name): 393fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis curNode = self.curNode 394fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = curNode.parentNode 395fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_end_element(curNode) 396fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 397fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _finish_end_element(self, curNode): 398fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info = self._elem_info.get(curNode.tagName) 399fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if info: 400fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._handle_white_text_nodes(curNode, info) 401fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter: 402fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if curNode is self.document.documentElement: 403fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 404fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._filter.acceptNode(curNode) == FILTER_REJECT: 405fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode.removeChild(curNode) 406fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis curNode.unlink() 407fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 408fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _handle_white_text_nodes(self, node, info): 409fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if (self._options.whitespace_in_element_content 410fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis or not info.isElementContent()): 411fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return 412fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 413fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # We have element type information and should remove ignorable 414fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # whitespace; identify for text nodes which contain only 415fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # whitespace. 416fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis L = [] 417fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for child in node.childNodes: 418fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if child.nodeType == TEXT_NODE and not child.data.strip(): 419fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis L.append(child) 420fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 421fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # Remove ignorable whitespace from the tree. 422fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for child in L: 423fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.removeChild(child) 424fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 425fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def element_decl_handler(self, name, model): 426fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info = self._elem_info.get(name) 427fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if info is None: 428fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._elem_info[name] = ElementInfo(name, model) 429fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 430fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis assert info._model is None 431fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info._model = model 432fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 433fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def attlist_decl_handler(self, elem, name, type, default, required): 434fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info = self._elem_info.get(elem) 435fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if info is None: 436fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info = ElementInfo(elem) 437fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._elem_info[elem] = info 438fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis info._attr_info.append( 439fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis [None, name, None, None, default, 0, type, required]) 440fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 441fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def xml_decl_handler(self, version, encoding, standalone): 442fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.version = version 443fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.encoding = encoding 444fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # This is still a little ugly, thanks to the pyexpat API. ;-( 445fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if standalone >= 0: 446fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if standalone: 447fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.standalone = True 448fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 449fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document.standalone = False 450fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 451fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 452fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# Don't include FILTER_INTERRUPT, since that's checked separately 453fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# where allowed. 454fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis_ALLOWED_FILTER_RETURNS = (FILTER_ACCEPT, FILTER_REJECT, FILTER_SKIP) 455fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 45649fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersclass FilterVisibilityController(object): 457fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Wrapper around a DOMBuilderFilter which implements the checks 458fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis to make the whatToShow filter attribute work.""" 459fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 460fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis __slots__ = 'filter', 461fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 462fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, filter): 463fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.filter = filter 464fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 465fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def startContainer(self, node): 466fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis mask = self._nodetype_mask[node.nodeType] 467fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self.filter.whatToShow & mask: 468fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis val = self.filter.startContainer(node) 469fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if val == FILTER_INTERRUPT: 470fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise ParseEscape 471fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if val not in _ALLOWED_FILTER_RETURNS: 47270e79803fcc93e19808faa240a5f5e4854d0b077Collin Winter raise ValueError( 47370e79803fcc93e19808faa240a5f5e4854d0b077Collin Winter "startContainer() returned illegal value: " + repr(val)) 474fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return val 475fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 476fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return FILTER_ACCEPT 477fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 478fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def acceptNode(self, node): 479fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis mask = self._nodetype_mask[node.nodeType] 480fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self.filter.whatToShow & mask: 481fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis val = self.filter.acceptNode(node) 482fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if val == FILTER_INTERRUPT: 483fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise ParseEscape 484fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if val == FILTER_SKIP: 485fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # move all child nodes to the parent, and remove this node 486fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parent = node.parentNode 487fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for child in node.childNodes[:]: 488fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parent.appendChild(child) 489fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # node is handled by the caller 490fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return FILTER_REJECT 491fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if val not in _ALLOWED_FILTER_RETURNS: 49270e79803fcc93e19808faa240a5f5e4854d0b077Collin Winter raise ValueError( 49370e79803fcc93e19808faa240a5f5e4854d0b077Collin Winter "acceptNode() returned illegal value: " + repr(val)) 494fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return val 495fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 496fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return FILTER_ACCEPT 497fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 498fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _nodetype_mask = { 499fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.ELEMENT_NODE: NodeFilter.SHOW_ELEMENT, 500fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.ATTRIBUTE_NODE: NodeFilter.SHOW_ATTRIBUTE, 501fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.TEXT_NODE: NodeFilter.SHOW_TEXT, 502fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.CDATA_SECTION_NODE: NodeFilter.SHOW_CDATA_SECTION, 503fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.ENTITY_REFERENCE_NODE: NodeFilter.SHOW_ENTITY_REFERENCE, 504fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.ENTITY_NODE: NodeFilter.SHOW_ENTITY, 505fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.PROCESSING_INSTRUCTION_NODE: NodeFilter.SHOW_PROCESSING_INSTRUCTION, 506fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.COMMENT_NODE: NodeFilter.SHOW_COMMENT, 507fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.DOCUMENT_NODE: NodeFilter.SHOW_DOCUMENT, 508fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.DOCUMENT_TYPE_NODE: NodeFilter.SHOW_DOCUMENT_TYPE, 509fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.DOCUMENT_FRAGMENT_NODE: NodeFilter.SHOW_DOCUMENT_FRAGMENT, 510fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Node.NOTATION_NODE: NodeFilter.SHOW_NOTATION, 511fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis } 512fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 513fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 51449fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersclass FilterCrutch(object): 515fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis __slots__ = '_builder', '_level', '_old_start', '_old_end' 516fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 517fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, builder): 518fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._level = 0 519fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._builder = builder 520fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = builder._parser 521fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._old_start = parser.StartElementHandler 522fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._old_end = parser.EndElementHandler 523fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartElementHandler = self.start_element_handler 524fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EndElementHandler = self.end_element_handler 525fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 526fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass Rejecter(FilterCrutch): 527fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis __slots__ = () 528fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 529fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, builder): 530fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis FilterCrutch.__init__(self, builder) 531fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = builder._parser 532fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for name in ("ProcessingInstructionHandler", 533fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "CommentHandler", 534fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "CharacterDataHandler", 535fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "StartCdataSectionHandler", 536fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "EndCdataSectionHandler", 537fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "ExternalEntityRefHandler", 538fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ): 539fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis setattr(parser, name, None) 540fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 541fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_element_handler(self, *args): 542fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._level = self._level + 1 543fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 544fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_element_handler(self, *args): 545fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._level == 0: 546fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # restore the old handlers 547fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self._builder._parser 548fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._builder.install(parser) 549fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartElementHandler = self._old_start 550fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EndElementHandler = self._old_end 551fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 552fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._level = self._level - 1 553fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 554fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass Skipper(FilterCrutch): 555fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis __slots__ = () 556fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 557fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_element_handler(self, *args): 558fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = self._builder.curNode 559fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._old_start(*args) 560fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._builder.curNode is not node: 561fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._level = self._level + 1 562fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 563fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_element_handler(self, *args): 564fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._level == 0: 565fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # We're popping back out of the node we're skipping, so we 566fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # shouldn't need to do anything but reset the handlers. 567fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._builder._parser.StartElementHandler = self._old_start 568fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._builder._parser.EndElementHandler = self._old_end 569fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._builder = None 570fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 571fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._level = self._level - 1 572fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._old_end(*args) 573fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 574fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 575fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# framework document used by the fragment builder. 576fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis# Takes a string for the doctype, subset string, and namespace attrs string. 577fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 578fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis_FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID = \ 579fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "http://xml.python.org/entities/fragment-builder/internal" 580fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 581fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis_FRAGMENT_BUILDER_TEMPLATE = ( 582fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis '''\ 583fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis<!DOCTYPE wrapper 584fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis %%s [ 585fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis <!ENTITY fragment-builder-internal 586fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis SYSTEM "%s"> 587fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis%%s 588fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis]> 589fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis<wrapper %%s 590fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis>&fragment-builder-internal;</wrapper>''' 591fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis % _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID) 592fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 593fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 594fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass FragmentBuilder(ExpatBuilder): 595fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Builder which constructs document fragments given XML source 596fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis text and a context node. 597fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 598fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis The context node is expected to provide information about the 599fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis namespace declarations which are in scope at the start of the 600fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis fragment. 601fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 602fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 603fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def __init__(self, context, options=None): 604fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if context.nodeType == DOCUMENT_NODE: 605fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.originalDocument = context 606fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.context = context 607fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 608fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.originalDocument = context.ownerDocument 609fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.context = context 610fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.__init__(self, options) 611fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 612fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def reset(self): 613fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.reset(self) 614fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.fragment = None 615fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 616fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseFile(self, file): 617fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document fragment from a file object, returning the 618fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis fragment node.""" 619fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self.parseString(file.read()) 620fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 621fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseString(self, string): 622fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document fragment from a string, returning the 623fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis fragment node.""" 624fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._source = string 625fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self.getParser() 626fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype = self.originalDocument.doctype 627fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ident = "" 628fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if doctype: 629fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis subset = doctype.internalSubset or self._getDeclarations() 630fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if doctype.publicId: 631fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ident = ('PUBLIC "%s" "%s"' 632fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis % (doctype.publicId, doctype.systemId)) 633fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis elif doctype.systemId: 634fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ident = 'SYSTEM "%s"' % doctype.systemId 635fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 636fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis subset = "" 637fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis nsattrs = self._getNSattrs() # get ns decls from node's ancestors 638fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis document = _FRAGMENT_BUILDER_TEMPLATE % (ident, subset, nsattrs) 639fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 640fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.Parse(document, 1) 641fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis except: 642fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.reset() 643fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise 644fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis fragment = self.fragment 645fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.reset() 646fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis## self._parser = None 647fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return fragment 648fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 649fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _getDeclarations(self): 650fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Re-create the internal subset from the DocumentType node. 651fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 652fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis This is only needed if we don't already have the 653fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis internalSubset as a string. 654fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 655fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis doctype = self.context.ownerDocument.doctype 656fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = "" 657fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if doctype: 658fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for i in range(doctype.notations.length): 659fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis notation = doctype.notations.item(i) 660fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if s: 661fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = s + "\n " 662fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = "%s<!NOTATION %s" % (s, notation.nodeName) 663fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if notation.publicId: 664fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = '%s PUBLIC "%s"\n "%s">' \ 665fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis % (s, notation.publicId, notation.systemId) 666fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 667fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = '%s SYSTEM "%s">' % (s, notation.systemId) 668fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for i in range(doctype.entities.length): 669fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis entity = doctype.entities.item(i) 670fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if s: 671fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = s + "\n " 672fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = "%s<!ENTITY %s" % (s, entity.nodeName) 673fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if entity.publicId: 674fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = '%s PUBLIC "%s"\n "%s"' \ 675fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis % (s, entity.publicId, entity.systemId) 676fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis elif entity.systemId: 677fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = '%s SYSTEM "%s"' % (s, entity.systemId) 678fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 679fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = '%s "%s"' % (s, entity.firstChild.data) 680fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if entity.notationName: 681fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = "%s NOTATION %s" % (s, entity.notationName) 682fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = s + ">" 683fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return s 684fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 685fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _getNSattrs(self): 686fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return "" 687fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 688fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def external_entity_ref_handler(self, context, base, systemId, publicId): 689fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if systemId == _FRAGMENT_BUILDER_INTERNAL_SYSTEM_ID: 690fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # this entref is the one that we made to put the subtree 691fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # in; all of our given input is parsed in here. 692fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis old_document = self.document 693fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis old_cur_node = self.curNode 694fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self._parser.ExternalEntityParserCreate(context) 695fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # put the real document back, parse into the fragment to return 696fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document = self.originalDocument 697fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.fragment = self.document.createDocumentFragment() 698fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = self.fragment 699fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 700fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.Parse(self._source, 1) 701fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis finally: 702fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = old_cur_node 703fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.document = old_document 704fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._source = None 705fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return -1 706fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 707fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return ExpatBuilder.external_entity_ref_handler( 708fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self, context, base, systemId, publicId) 709fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 710fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 711fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass Namespaces: 712fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Mix-in class for builders; adds support for namespaces.""" 713fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 714fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _initNamespaces(self): 715fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # list of (prefix, uri) ns declarations. Namespace attrs are 716fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # constructed from this and added to the element's attrs. 717fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._ns_ordered_prefixes = [] 718fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 719fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def createParser(self): 720fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Create a new namespace-handling parser.""" 721fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = expat.ParserCreate(namespace_separator=" ") 722fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.namespace_prefixes = True 723fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return parser 724fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 725fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def install(self, parser): 726fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Insert the namespace-handlers onto the parser.""" 727fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.install(self, parser) 728fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._options.namespace_declarations: 729fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartNamespaceDeclHandler = ( 730fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.start_namespace_decl_handler) 731fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 732fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_namespace_decl_handler(self, prefix, uri): 733fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Push this namespace declaration on our storage.""" 734fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._ns_ordered_prefixes.append((prefix, uri)) 735fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 736fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_element_handler(self, name, attributes): 737fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if ' ' in name: 738fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri, localname, prefix, qname = _parse_ns_name(self, name) 739fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 740fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri = EMPTY_NAMESPACE 741fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis qname = name 742fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis localname = None 743fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis prefix = EMPTY_PREFIX 744fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node = minidom.Element(qname, uri, prefix, localname) 745fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node.ownerDocument = self.document 746fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _append_child(self.curNode, node) 747fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = node 748fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 749fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if self._ns_ordered_prefixes: 750fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for prefix, uri in self._ns_ordered_prefixes: 751fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if prefix: 752fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis a = minidom.Attr(_intern(self, 'xmlns:' + prefix), 753fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis XMLNS_NAMESPACE, prefix, "xmlns") 754fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 755fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis a = minidom.Attr("xmlns", XMLNS_NAMESPACE, 756fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "xmlns", EMPTY_PREFIX) 75714aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.value = uri 7586c75301eb6892170dcbea634b3be5cfa179c3d2eFlorent Xicluna a.ownerDocument = self.document 759fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _set_attribute_node(node, a) 760fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis del self._ns_ordered_prefixes[:] 761fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 762fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if attributes: 7637b77188e89182e0cf00d42f5556f972a739d6bebMartin v. Löwis node._ensure_attributes() 764fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrs = node._attrs 765fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrsNS = node._attrsNS 766fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for i in range(0, len(attributes), 2): 767fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis aname = attributes[i] 768fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis value = attributes[i+1] 769fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if ' ' in aname: 770fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri, localname, prefix, qname = _parse_ns_name(self, aname) 771fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis a = minidom.Attr(qname, uri, localname, prefix) 772fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrs[qname] = a 773fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrsNS[(uri, localname)] = a 774fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 775fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis a = minidom.Attr(aname, EMPTY_NAMESPACE, 776fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis aname, EMPTY_PREFIX) 777fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrs[aname] = a 778fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis _attrsNS[(EMPTY_NAMESPACE, aname)] = a 77914aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.ownerDocument = self.document 78014aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.value = value 78114aa280de23ecb6c31ffbde4a12c1a0a6f5493e0Martin v. Löwis a.ownerElement = node 782fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 783fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if __debug__: 784fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # This only adds some asserts to the original 785fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # end_element_handler(), so we only define this when -O is not 786fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # used. If changing one, be sure to check the other to see if 787fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # it needs to be changed as well. 788fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # 789fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_element_handler(self, name): 790fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis curNode = self.curNode 791fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if ' ' in name: 792fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis uri, localname, prefix, qname = _parse_ns_name(self, name) 793fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis assert (curNode.namespaceURI == uri 794fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis and curNode.localName == localname 795fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis and curNode.prefix == prefix), \ 796fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "element stack messed up! (namespace)" 797fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 798fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis assert curNode.nodeName == name, \ 799fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "element stack messed up - bad nodeName" 800fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis assert curNode.namespaceURI == EMPTY_NAMESPACE, \ 801fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis "element stack messed up - bad namespaceURI" 802fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.curNode = curNode.parentNode 803fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._finish_end_element(curNode) 804fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 805fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 806fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass ExpatBuilderNS(Namespaces, ExpatBuilder): 807fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Document builder that supports namespaces.""" 808fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 809fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def reset(self): 810fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.reset(self) 811fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._initNamespaces() 812fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 813fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 814fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass FragmentBuilderNS(Namespaces, FragmentBuilder): 815fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Fragment builder that supports namespaces.""" 816fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 817fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def reset(self): 818fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis FragmentBuilder.reset(self) 819fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self._initNamespaces() 820fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 821fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def _getNSattrs(self): 822fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Return string of namespace attributes from this element and 823fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ancestors.""" 824fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # XXX This needs to be re-written to walk the ancestors of the 825fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # context to build up the namespace information from 826fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # declarations, elements, and attributes found in context. 827fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # Otherwise we have to store a bunch more data on the DOM 828fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # (though that *might* be more reliable -- not clear). 829fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis attrs = "" 830fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis context = self.context 831fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis L = [] 832fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis while context: 833fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if hasattr(context, '_ns_prefix_uri'): 834fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis for prefix, uri in context._ns_prefix_uri.items(): 835fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis # add every new NS decl from context to L and attrs string 836fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if prefix in L: 837fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis continue 838fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis L.append(prefix) 839fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if prefix: 840fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis declname = "xmlns:" + prefix 841fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 842fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis declname = "xmlns" 843fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if attrs: 844fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis attrs = "%s\n %s='%s'" % (attrs, declname, uri) 845fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 846fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis attrs = " %s='%s'" % (declname, uri) 847fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis context = context.parentNode 848fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return attrs 849fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 850fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 851fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass ParseEscape(Exception): 852fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Exception raised to short-circuit parsing in InternalSubsetExtractor.""" 853fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis pass 854fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 855fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisclass InternalSubsetExtractor(ExpatBuilder): 856fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """XML processor which can rip out the internal document type subset.""" 857fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 858fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis subset = None 859fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 860fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def getSubset(self): 861fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Return the internal subset as a string.""" 862fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return self.subset 863fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 864fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseFile(self, file): 865fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 866fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.parseFile(self, file) 867fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis except ParseEscape: 868fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis pass 869fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 870fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def parseString(self, string): 871fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis try: 872fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis ExpatBuilder.parseString(self, string) 873fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis except ParseEscape: 874fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis pass 875fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 876fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def install(self, parser): 877fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartDoctypeDeclHandler = self.start_doctype_decl_handler 878fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.StartElementHandler = self.start_element_handler 879fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 880fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_doctype_decl_handler(self, name, publicId, systemId, 881fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis has_internal_subset): 882fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if has_internal_subset: 883fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser = self.getParser() 884fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.subset = [] 885fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.DefaultHandler = self.subset.append 886fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parser.EndDoctypeDeclHandler = self.end_doctype_decl_handler 887fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 888fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise ParseEscape() 889fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 890fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def end_doctype_decl_handler(self): 891fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis s = ''.join(self.subset).replace('\r\n', '\n').replace('\r', '\n') 892fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis self.subset = s 893fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise ParseEscape() 894fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 895fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis def start_element_handler(self, name, attrs): 896fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis raise ParseEscape() 897fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 898fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 89949fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersdef parse(file, namespaces=True): 900fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document, returning the resulting Document node. 901fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 902fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 'file' may be either a file name or an open file object. 903fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 904fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if namespaces: 905fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = ExpatBuilderNS() 906fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 907fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = ExpatBuilder() 908fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 909c9543e42330e5f339d6419eba6a8c5a61a39aecaChristian Heimes if isinstance(file, str): 9102f50aaf2ff427fb713e82699a6dcbeeb038b10c2Giampaolo Rodola' with open(file, 'rb') as fp: 911fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis result = builder.parseFile(fp) 912fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 913fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis result = builder.parseFile(file) 914fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return result 915fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 916fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 91749fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersdef parseString(string, namespaces=True): 918fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a document from a string, returning the resulting 919fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis Document node. 920fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 921fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if namespaces: 922fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = ExpatBuilderNS() 923fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 924fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = ExpatBuilder() 925fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return builder.parseString(string) 926fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 927fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 92849fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersdef parseFragment(file, context, namespaces=True): 929fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a fragment of a document, given the context from which it 930fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis was originally extracted. context should be the parent of the 931fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis node(s) which are in the fragment. 932fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 933fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 'file' may be either a file name or an open file object. 934fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 935fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if namespaces: 936fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = FragmentBuilderNS(context) 937fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 938fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = FragmentBuilder(context) 939fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 940c9543e42330e5f339d6419eba6a8c5a61a39aecaChristian Heimes if isinstance(file, str): 9412f50aaf2ff427fb713e82699a6dcbeeb038b10c2Giampaolo Rodola' with open(file, 'rb') as fp: 942fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis result = builder.parseFile(fp) 943fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 944fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis result = builder.parseFile(file) 945fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return result 946fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 947fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 94849fd7fa4431da299196d74087df4a04f99f9c46fThomas Woutersdef parseFragmentString(string, context, namespaces=True): 949fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Parse a fragment of a document from a string, given the context 950fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis from which it was originally extracted. context should be the 951fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis parent of the node(s) which are in the fragment. 952fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """ 953fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if namespaces: 954fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = FragmentBuilderNS(context) 955fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 956fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis builder = FragmentBuilder(context) 957fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return builder.parseString(string) 958fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 959fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis 960fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwisdef makeBuilder(options): 961fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis """Create a builder based on an Options object.""" 962fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis if options.namespaces: 963fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return ExpatBuilderNS(options) 964fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis else: 965fc5fec77350c1231f55ac8facb1ec3ce0d635a4dMartin v. Löwis return ExpatBuilder(options) 966